From 4f001f2dd25aacdb63f2b0311f395b9d76dcac9f Mon Sep 17 00:00:00 2001
From: Gang Ji <gang.ji@citrix.com>
Date: Thu, 16 May 2024 16:49:39 +0800
Subject: [PATCH 1/9] CP-48625: Code refactoring

Add func host_to_vm_count_map to be used, rename RefMap to HostMap

Signed-off-by: Gang Ji <gang.ji@citrix.com>
---
 ocaml/xapi/xapi_vm_helpers.ml | 83 ++++++++++++++++++++++++++++-------
 1 file changed, 67 insertions(+), 16 deletions(-)

diff --git a/ocaml/xapi/xapi_vm_helpers.ml b/ocaml/xapi/xapi_vm_helpers.ml
index 2e07c7670cc..4de454970a7 100644
--- a/ocaml/xapi/xapi_vm_helpers.ml
+++ b/ocaml/xapi/xapi_vm_helpers.ml
@@ -35,7 +35,7 @@ module SRSet = Set.Make (struct
   let compare = Stdlib.compare
 end)
 
-module RefMap = Map.Make (struct
+module HostMap = Map.Make (struct
   type t = [`host] Ref.t
 
   let compare = Ref.compare
@@ -997,6 +997,46 @@ let rank_hosts_by_best_vgpu ~__context vgpu visible_hosts =
         hosts
       |> List.map (fun g -> List.map fst g)
 
+let host_to_vm_count_map ~__context group =
+  let host_of_vm vm =
+    let vm_rec = Db.VM.get_record ~__context ~self:vm in
+    (* 1. When a VM starts migrating, it's 'scheduled_to_be_resident_on' will be set,
+          while its 'resident_on' is not cleared. In this case,
+          'scheduled_to_be_resident_on' should be treated as its running host.
+       2. For paused VM, its 'resident_on' has value, but it will not be considered
+          while computing the amount of VMs. *)
+    match
+      ( vm_rec.API.vM_scheduled_to_be_resident_on
+      , vm_rec.API.vM_resident_on
+      , vm_rec.API.vM_power_state
+      )
+    with
+    | sh, _, _ when sh <> Ref.null ->
+        Some sh
+    | _, h, `Running when h <> Ref.null ->
+        Some h
+    | _ ->
+        None
+  in
+  Db.VM_group.get_VMs ~__context ~self:group
+  |> List.fold_left
+       (fun m vm ->
+         match host_of_vm vm with
+         | Some h ->
+             HostMap.update h
+               (fun c -> Option.(value ~default:0 c |> succ |> some))
+               m
+         | None ->
+             m
+       )
+       HostMap.empty
+
+let rank_hosts_by_vm_cnt_in_group ~__context group hosts =
+  let host_map = host_to_vm_count_map ~__context group in
+  Helpers.group_by ~ordering:`ascending
+    (fun h -> HostMap.find_opt h host_map |> Option.value ~default:0)
+    hosts
+
 (* Group all hosts to 2 parts:
    1. A list of affinity host (only one host).
    2. A list of lists, each list contains hosts with the same number of
@@ -1065,6 +1105,27 @@ let rank_hosts_by_placement ~__context ~vm ~group =
   in
   affinity_host :: sorted_hosts |> List.filter (( <> ) [])
 
+let rec select_host_from_ranked_lists ~vm ~host_selector ~ranked_host_lists =
+  match ranked_host_lists with
+  | [] ->
+      raise (Api_errors.Server_error (Api_errors.no_hosts_available, []))
+  | hosts :: less_optimal_groups_of_hosts -> (
+      debug
+        "Attempting to select host for VM (%s) in a group of equally optimal \
+         hosts [ %s ]"
+        (Ref.string_of vm)
+        (String.concat ";" (List.map Ref.string_of hosts)) ;
+      try host_selector hosts
+      with _ ->
+        info
+          "Failed to select host for VM (%s) in any of [ %s ], continue to \
+           select from less optimal hosts"
+          (Ref.string_of vm)
+          (String.concat ";" (List.map Ref.string_of hosts)) ;
+        select_host_from_ranked_lists ~vm ~host_selector
+          ~ranked_host_lists:less_optimal_groups_of_hosts
+    )
+
 (* Selects a single host from the set of all hosts on which the given [vm] can boot.
    Raises [Api_errors.no_hosts_available] if no such host exists.
    1.Take anti-affinity, or VGPU, or Network SR-IOV as a group_key for group all hosts into host list list
@@ -1109,22 +1170,12 @@ let choose_host_for_vm_no_wlb ~__context ~vm ~snapshot =
                )
             )
   in
-  let rec select_host_from = function
-    | [] ->
-        raise (Api_errors.Server_error (Api_errors.no_hosts_available, []))
-    | hosts :: less_optimal_groups_of_hosts -> (
-        debug
-          "Attempting to start VM (%s) on one of equally optimal hosts [ %s ]"
-          (Ref.string_of vm)
-          (String.concat ";" (List.map Ref.string_of hosts)) ;
-        try Xapi_vm_placement.select_host __context vm validate_host hosts
-        with _ ->
-          info "Failed to start VM (%s) on any of [ %s ]" (Ref.string_of vm)
-            (String.concat ";" (List.map Ref.string_of hosts)) ;
-          select_host_from less_optimal_groups_of_hosts
-      )
+  let host_selector =
+    Xapi_vm_placement.select_host __context vm validate_host
   in
-  try select_host_from host_lists
+  try
+    select_host_from_ranked_lists ~vm ~host_selector
+      ~ranked_host_lists:host_lists
   with
   | Api_errors.Server_error (x, []) when x = Api_errors.no_hosts_available ->
     debug

From 1234dc71af29ccef50dfd571ea0d291bb80f6c1b Mon Sep 17 00:00:00 2001
From: Gang Ji <gang.ji@citrix.com>
Date: Thu, 16 May 2024 16:59:42 +0800
Subject: [PATCH 2/9] CP-48625: Code refactoring

fixup review comments

Signed-off-by: Gang Ji <gang.ji@citrix.com>
---
 ocaml/xapi/xapi_vm_helpers.ml | 78 +++++++++++------------------------
 1 file changed, 24 insertions(+), 54 deletions(-)

diff --git a/ocaml/xapi/xapi_vm_helpers.ml b/ocaml/xapi/xapi_vm_helpers.ml
index 4de454970a7..c25a887b54b 100644
--- a/ocaml/xapi/xapi_vm_helpers.ml
+++ b/ocaml/xapi/xapi_vm_helpers.ml
@@ -1037,6 +1037,13 @@ let rank_hosts_by_vm_cnt_in_group ~__context group hosts =
     (fun h -> HostMap.find_opt h host_map |> Option.value ~default:0)
     hosts
 
+let get_affinity_host ~__context ~vm =
+  match Db.VM.get_affinity ~__context ~self:vm with
+  | ref when Db.is_valid_ref __context ref ->
+      Some ref
+  | _ ->
+      None
+
 (* Group all hosts to 2 parts:
    1. A list of affinity host (only one host).
    2. A list of lists, each list contains hosts with the same number of
@@ -1051,77 +1058,40 @@ let rank_hosts_by_vm_cnt_in_group ~__context group hosts =
    ]
 *)
 let rank_hosts_by_placement ~__context ~vm ~group =
-  let affinity_host =
-    match Db.VM.get_affinity ~__context ~self:vm with
-    | ref when Db.is_valid_ref __context ref ->
-        [ref]
-    | _ ->
-        []
+  let hosts = Db.Host.get_all ~__context in
+  let affinity_host = get_affinity_host ~__context ~vm in
+  let hosts_without_affinity =
+    Option.fold ~none:hosts
+      ~some:(fun host -> List.filter (( <> ) host) hosts)
+      affinity_host
   in
   let sorted_hosts =
-    let host_without_affinity_host =
-      Db.Host.get_all ~__context
-      |> List.filter (fun host -> not (List.mem host affinity_host))
-    in
-    let host_of_vm vm =
-      let vm_rec = Db.VM.get_record ~__context ~self:vm in
-      (* 1. When a VM starts migrating, it's 'scheduled_to_be_resident_on' will be set,
-            while its 'resident_on' is not cleared. In this case,
-            'scheduled_to_be_resident_on' should be treated as its running host.
-         2. For paused VM, its 'resident_on' has value, but it will not be considered
-            while computing the amount of VMs. *)
-      match
-        ( vm_rec.API.vM_scheduled_to_be_resident_on
-        , vm_rec.API.vM_resident_on
-        , vm_rec.API.vM_power_state
-        )
-      with
-      | sh, _, _ when sh <> Ref.null ->
-          Some sh
-      | _, h, `Running when h <> Ref.null ->
-          Some h
-      | _ ->
-          None
-    in
-    let host_map =
-      Db.VM_group.get_VMs ~__context ~self:group
-      |> List.fold_left
-           (fun m vm ->
-             match host_of_vm vm with
-             | Some h ->
-                 RefMap.update h
-                   (fun c -> Option.(value ~default:0 c |> succ |> some))
-                   m
-             | None ->
-                 m
-           )
-           RefMap.empty
-    in
-    host_without_affinity_host
-    |> Helpers.group_by ~ordering:`ascending (fun h ->
-           RefMap.find_opt h host_map |> Option.value ~default:0
-       )
-    |> List.map (fun g -> List.map fst g)
+    hosts_without_affinity
+    |> rank_hosts_by_vm_cnt_in_group ~__context group
+    |> List.(map (map fst))
   in
-  affinity_host :: sorted_hosts |> List.filter (( <> ) [])
+  match affinity_host with
+  | Some host ->
+      [host] :: sorted_hosts
+  | None ->
+      sorted_hosts
 
 let rec select_host_from_ranked_lists ~vm ~host_selector ~ranked_host_lists =
   match ranked_host_lists with
   | [] ->
       raise (Api_errors.Server_error (Api_errors.no_hosts_available, []))
   | hosts :: less_optimal_groups_of_hosts -> (
+      let hosts_str = String.concat ";" (List.map Ref.string_of hosts) in
       debug
         "Attempting to select host for VM (%s) in a group of equally optimal \
          hosts [ %s ]"
-        (Ref.string_of vm)
-        (String.concat ";" (List.map Ref.string_of hosts)) ;
+        (Ref.string_of vm) hosts_str ;
       try host_selector hosts
       with _ ->
         info
           "Failed to select host for VM (%s) in any of [ %s ], continue to \
            select from less optimal hosts"
-          (Ref.string_of vm)
-          (String.concat ";" (List.map Ref.string_of hosts)) ;
+          (Ref.string_of vm) hosts_str ;
         select_host_from_ranked_lists ~vm ~host_selector
           ~ranked_host_lists:less_optimal_groups_of_hosts
     )

From 2b742bf9464561e45b5371dbaba10bbdab11edf8 Mon Sep 17 00:00:00 2001
From: Gang Ji <gang.ji@citrix.com>
Date: Thu, 23 May 2024 15:56:05 +0800
Subject: [PATCH 3/9] opam: add psq to xapi dependencies

Signed-off-by: Gang Ji <gang.ji@citrix.com>
---
 ocaml/xapi/dune    | 1 +
 xapi.opam          | 1 +
 xapi.opam.template | 1 +
 3 files changed, 3 insertions(+)

diff --git a/ocaml/xapi/dune b/ocaml/xapi/dune
index fe161e0dd5f..b6ec0adf084 100644
--- a/ocaml/xapi/dune
+++ b/ocaml/xapi/dune
@@ -83,6 +83,7 @@
     pam
     pciutil
     pci
+    psq
     ptime
     rpclib.core
     rpclib.json
diff --git a/xapi.opam b/xapi.opam
index e414d694b2c..387ba542fe6 100644
--- a/xapi.opam
+++ b/xapi.opam
@@ -38,6 +38,7 @@ depends: [
   "ppx_deriving_rpc"
   "ppx_sexp_conv"
   "ppx_deriving"
+  "psq"
   "rpclib"
   "rrdd-plugin"
   "rresult"
diff --git a/xapi.opam.template b/xapi.opam.template
index dc48554787e..49f3902f66a 100644
--- a/xapi.opam.template
+++ b/xapi.opam.template
@@ -36,6 +36,7 @@ depends: [
   "ppx_deriving_rpc"
   "ppx_sexp_conv"
   "ppx_deriving"
+  "psq"
   "rpclib"
   "rrdd-plugin"
   "rresult"

From 4452e5e77cc3d7ecb3e76e1dfb56590c24bcdb05 Mon Sep 17 00:00:00 2001
From: Gang Ji <gang.ji@citrix.com>
Date: Thu, 16 May 2024 16:37:06 +0800
Subject: [PATCH 4/9] CP-48625: Anti-affinity support for host evacuation

Host evacuation plan with anti-affinity support will be carried out in 3
steps:

1. Try to get a "spread evenly" plan for anti-affinity VMs, done if
   all the rest VMs got planned using binpack, otherwise continue.
2. Try to get a "no breach" plan for anti-affinity VMs, done if all the
   rest VMs got planned using binpack, otherwise continue.
3. Carry out a binpack plan ignoring VM anti-affinity.

Signed-off-by: Gang Ji <gang.ji@citrix.com>
---
 ocaml/xapi/xapi_ha_vm_failover.ml | 485 +++++++++++++++++++++++++++---
 1 file changed, 442 insertions(+), 43 deletions(-)

diff --git a/ocaml/xapi/xapi_ha_vm_failover.ml b/ocaml/xapi/xapi_ha_vm_failover.ml
index 4fbf46860f2..9d8ceb0865d 100644
--- a/ocaml/xapi/xapi_ha_vm_failover.ml
+++ b/ocaml/xapi/xapi_ha_vm_failover.ml
@@ -175,9 +175,6 @@ let order_f (_, vm_rec) =
 
 let ( $ ) x y = x y
 
-(*****************************************************************************************************)
-(* Planning code follows                                                                             *)
-
 (* Compute the total memory required of a VM (Running or not) *)
 let total_memory_of_vm ~__context policy snapshot =
   let main, shadow =
@@ -185,50 +182,440 @@ let total_memory_of_vm ~__context policy snapshot =
   in
   Int64.add main shadow
 
-(** Return a VM -> Host plan for the Host.evacuate code. We assume the VMs are all agile. The returned plan may
-    be incomplete if there was not enough memory. *)
-let compute_evacuation_plan ~__context total_hosts remaining_hosts
-    vms_and_snapshots =
-  let hosts =
-    List.map
-      (fun host ->
-        ( host
-        , Memory_check.host_compute_free_memory_with_maximum_compression
-            ~__context ~host None
-        )
-      )
-      remaining_hosts
+let host_free_memory ~__context ~host =
+  Memory_check.host_compute_free_memory_with_maximum_compression ~__context
+    ~host None
+
+let vm_memory ~__context snapshot =
+  let policy =
+    match Helpers.check_domain_type snapshot.API.vM_domain_type with
+    | `hvm | `pv ->
+        Memory_check.Dynamic_min
+    | `pv_in_pvh | `pvh ->
+        Memory_check.Static_max
   in
-  let vms =
-    List.map
-      (fun (vm, snapshot) ->
-        let policy =
-          match Helpers.check_domain_type snapshot.API.vM_domain_type with
-          | `hvm | `pv ->
-              Memory_check.Dynamic_min
-          | `pv_in_pvh | `pvh ->
-              Memory_check.Static_max
-        in
-        (vm, total_memory_of_vm ~__context policy snapshot)
-      )
-      vms_and_snapshots
+  total_memory_of_vm ~__context policy snapshot
+
+module VMGroupMap = Map.Make (struct
+  type t = [`VM_group] Ref.t
+
+  let compare = Ref.compare
+end)
+
+module HostKey = struct
+  type t = [`host] Ref.t
+
+  let compare = Ref.compare
+end
+
+(* For some VM anti-affinity group, the state of a host which determines
+   evacuation planning for anti-affinity VMs in that group:
+   1. vm_cnt: the number of running VMs in that group resident on the host
+   2. h_size: the amount of free memory of the host *)
+module AntiAffinityEvacuationPlanHostState = struct
+  type t = int * int64
+
+  (* During evacuation planning for anti-affintiy VMs, "vm_cnt" is the first
+     factor considered, "h_size" is the second factor considered:
+     Let's say the next anti-affinity VM to be planned belongs to group A, the
+     host to be selected should be the one which has minimal "vm_cnt" of group
+     A, for hosts with the same "vm_cnt", pick the one with the minimal
+     "h_size" which can hold the VM(h_size >= vm_size). *)
+  let compare (vm_cnt_0, h_size_0) (vm_cnt_1, h_size_1) =
+    match Int.compare vm_cnt_0 vm_cnt_1 with
+    | 0 ->
+        Int64.compare h_size_0 h_size_1
+    | c ->
+        c
+end
+
+(* A Psq of hosts for an anti-affinity group, which is used for evacuation
+   planning for anti-affinity VMs in that group: the minimal host in this Psq
+   is the first one to be considered to plan a VM from that group.
+   When several hosts share the minimal "AntiAffinityEvacuationPlanHostState",
+   the minimal host is the host with the smallest ref. *)
+module AntiAffinityEvacuationPlanHostQueue =
+  Psq.Make (HostKey) (AntiAffinityEvacuationPlanHostState)
+
+(* The pool state which determines the spread evenly evacuation planning for
+   anti-affinity VMs: a VMGroupMap which contains a Psq -
+   AntiAffinityEvacuationPlanHostQueue for each group, and each Psq contains
+   all the available hosts in the pool.
+   Let's say the next anti-affinity VM to be planned belongs to anti-affinity
+   group A. To get a spread evenly evacuation plan, the most suitable host to
+   plan the VM would be the host which has the minimal number of running VMs
+   from group A resident on it, for the hosts with the same number of running
+   VMs from group A, the one with the minimal free memory will be checked
+   first, which is just the minimal host returned from
+   "AntiAffinityEvacuationPlanHostQueue.min" on the Psq of group A. *)
+type spread_evenly_plan_pool_state =
+  AntiAffinityEvacuationPlanHostQueue.t VMGroupMap.t
+
+(* The pool state which determines the no breach evacuation planning for anti-
+   affinity VMs:
+   grp_no_resident_hosts_queue: a VMGroupMap which contains a Psq -
+   AntiAffinityEvacuationPlanHostQueue for each group, and each Psq contains
+   "no_resident" hosts for that group. (A "no_resident" host for a group is a
+   host which has no running VMs from that group resident on it.)
+   grp_resident_hosts_count_map: a VMGroupMap which contains the number of
+   "resident" hosts for each group. (A "resident" host for a group is a host
+   which has at least one running VM from that group resident on it.)
+   Let's say the next anti-affinity VM to be planned belongs to anti-affinity
+   group A. If for group A, "resident_hosts_count" is already 2 or
+   greater than 2, we don't need to plan the VM on any host, if not, we will
+   need to check the host with the minimal free memory from the no resident
+   hosts queue, which is just the minimal host returned from
+   "AntiAffinityEvacuationPlanHostQueue.min" on the "no_resident" hosts queue
+   of group A. *)
+type no_breach_plan_pool_state = {
+    grp_no_resident_hosts_queue:
+      AntiAffinityEvacuationPlanHostQueue.t VMGroupMap.t
+  ; grp_resident_hosts_count_map: int VMGroupMap.t
+}
+
+let init_spread_evenly_plan_pool_state ~__context hosts =
+  Db.VM_group.get_all ~__context
+  |> List.filter (fun group ->
+         Db.VM_group.get_placement ~__context ~self:group = `anti_affinity
+     )
+  |> List.fold_left
+       (fun grp_to_psq_map grp ->
+         let host_vm_cnt_map =
+           Xapi_vm_helpers.host_to_vm_count_map ~__context grp
+         in
+         VMGroupMap.add grp
+           (List.fold_left
+              (fun q (h, h_size) ->
+                let vm_cnt =
+                  Xapi_vm_helpers.HostMap.find_opt h host_vm_cnt_map
+                  |> Option.value ~default:0
+                in
+                AntiAffinityEvacuationPlanHostQueue.add h (vm_cnt, h_size) q
+              )
+              AntiAffinityEvacuationPlanHostQueue.empty hosts
+           )
+           grp_to_psq_map
+       )
+       VMGroupMap.empty
+
+(* Update "spread_evenly_plan_pool_state" after a VM from anti-affinity "group" with
+   memory size: "vm_size" is planned on the "host":
+   1. For the "group", increase "vm_cnt" of the "host" by 1.
+   2. For each group, updates the host's size by substracting "vm_size". *)
+let update_spread_evenly_plan_pool_state vm_size group host pool_state =
+  VMGroupMap.mapi
+    (fun grp hosts_q ->
+      match grp = group with
+      | true ->
+          AntiAffinityEvacuationPlanHostQueue.adjust host
+            (fun (vm_cnt, h_size) -> (vm_cnt + 1, Int64.sub h_size vm_size))
+            hosts_q
+      | false ->
+          AntiAffinityEvacuationPlanHostQueue.adjust host
+            (fun (vm_cnt, h_size) -> (vm_cnt, Int64.sub h_size vm_size))
+            hosts_q
+    )
+    pool_state
+
+let init_no_breach_plan_pool_state spread_evenly_plan_pool_state =
+  let grp_no_resident_hosts_queue =
+    spread_evenly_plan_pool_state
+    |> VMGroupMap.map
+         (AntiAffinityEvacuationPlanHostQueue.filter
+            (fun _h (vm_cnt, _h_size) -> vm_cnt = 0
+          )
+         )
+  in
+  let grp_resident_hosts_count_map =
+    spread_evenly_plan_pool_state
+    |> VMGroupMap.map (fun hosts_q ->
+           hosts_q
+           |> AntiAffinityEvacuationPlanHostQueue.filter
+                (fun _h (vm_cnt, _size) -> vm_cnt > 0
+              )
+           |> AntiAffinityEvacuationPlanHostQueue.size
+       )
   in
+  {grp_no_resident_hosts_queue; grp_resident_hosts_count_map}
+
+let update_grp_no_resident_hosts_queue vm_size group host
+    grp_no_resident_hosts_queue =
+  VMGroupMap.mapi
+    (fun grp hosts_q ->
+      match grp = group with
+      | true ->
+          hosts_q |> AntiAffinityEvacuationPlanHostQueue.remove host
+      | false ->
+          hosts_q
+          |> AntiAffinityEvacuationPlanHostQueue.update host
+               (Option.map (fun (_vm_cnt, h_size) ->
+                    (_vm_cnt, Int64.sub h_size vm_size)
+                )
+               )
+    )
+    grp_no_resident_hosts_queue
+
+(* Update "no_breach_plan_pool_state" after a VM from anti-affinity "group" with
+   memory size: "vm_size" is planned on the "host":
+   1. For the "group", the "host" is removed from its "no_resident" hosts queue, and increase its
+      "resident_hosts_count" by 1.
+   2. For other groups, updates the host's size by substracting "vm_size" if the host is in that
+      group's "no_resident" hosts queue. *)
+let update_no_breach_plan_pool_state vm_size group host pool_state =
+  {
+    grp_no_resident_hosts_queue=
+      pool_state.grp_no_resident_hosts_queue
+      |> update_grp_no_resident_hosts_queue vm_size group host
+  ; grp_resident_hosts_count_map=
+      pool_state.grp_resident_hosts_count_map
+      |> VMGroupMap.update group (Option.map succ)
+  }
+
+(*****************************************************************************************************)
+(* Planning code follows                                                                             *)
+
+let rec select_host_for_spread_evenly_plan vm vm_size hosts_queue =
+  match AntiAffinityEvacuationPlanHostQueue.min hosts_queue with
+  | None ->
+      None
+  | Some (host, (_vm_cnt, h_size)) -> (
+    match vm_size <= h_size with
+    | true ->
+        Some host
+    | _ ->
+        hosts_queue
+        |> AntiAffinityEvacuationPlanHostQueue.remove host
+        |> select_host_for_spread_evenly_plan vm vm_size
+  )
+
+let rec select_host_for_no_breach_plan vm vm_size hosts_queue =
+  match AntiAffinityEvacuationPlanHostQueue.min hosts_queue with
+  | Some (host, (0, h_size)) -> (
+    match vm_size <= h_size with
+    | true ->
+        Some host
+    | _ ->
+        hosts_queue
+        |> AntiAffinityEvacuationPlanHostQueue.remove host
+        |> select_host_for_no_breach_plan vm vm_size
+  )
+  | _ ->
+      None
+
+(* Try to get a spread evenly plan for anti-affinity VMs (for each anti-affinity group, the number of
+   running VMs from that group are spread evenly in all the rest hosts in the pool):
+   1. For all the anti-affinity VMs which sorted in an increasing order in terms of the VM's memory
+      size, do host selection as below step 2.
+   2. For each anti-affinity VM, select a host which can run it, and which has the minimal number of
+      VMs in the same anti-affinity group running on it, for the hosts with the same number of running
+      VMs in that group, pick the one with the minimal free memory. *)
+let rec compute_spread_evenly_plan ~__context pool_state anti_affinity_vms
+    mapping =
+  info "compute_spread_evenly_plan" ;
+  match anti_affinity_vms with
+  | [] ->
+      mapping
+  | (vm, vm_size, group) :: remaining_vms -> (
+      debug
+        "Spread evenly plan: try to plan for the anti-affinity VM (%s %s %s)."
+        (Ref.string_of vm)
+        (Db.VM.get_name_label ~__context ~self:vm)
+        (Db.VM_group.get_name_label ~__context ~self:group) ;
+      match
+        VMGroupMap.find group pool_state
+        |> select_host_for_spread_evenly_plan vm vm_size
+      with
+      | None ->
+          debug
+            "Spread evenly plan: no host can run this anti-affinity VM. Stop \
+             the planning as there won't be a valid plan for this VM." ;
+          raise (Api_errors.Server_error (Api_errors.no_hosts_available, []))
+      | Some h ->
+          debug
+            "Spread evenly plan: choose the host with the minimal free memory \
+             which can run the vm: (%s %s)."
+            (Ref.string_of h)
+            (Db.Host.get_name_label ~__context ~self:h) ;
+          compute_spread_evenly_plan ~__context
+            (pool_state |> update_spread_evenly_plan_pool_state vm_size group h)
+            remaining_vms ((vm, h) :: mapping)
+    )
+
+(* Try to get a no breach plan for anti-affinity VMs (for each anti-affinity group, there are at least
+   2 hosts having running VMs in the group):
+   1. For all the anti-affinity VMs which sorted in an increasing order in terms of the VM's memory
+      size, do host selection as below step 2.
+   2. For each anti-affinity VM, try to select a host for it so that there are at least 2 hosts which
+      has running VMs in the same anti-affinity group. If there are already 2 hosts having running VMs
+      in that group, skip planning for the VM. *)
+let rec compute_no_breach_plan ~__context total_hosts pool_state
+    anti_affinity_vms mapping =
+  info "compute_no_breach_plan" ;
+  match (total_hosts, anti_affinity_vms) with
+  | not_enough, _ when not_enough < 3 ->
+      debug
+        "There are less than 2 available hosts to migrate VMs to, \
+         anti-affinity breach is inevitable" ;
+      raise (Api_errors.Server_error (Api_errors.no_hosts_available, []))
+  | _, [] ->
+      mapping
+  | _, (vm, vm_size, group) :: remaining_vms ->
+      debug "No breach plan: try to plan for VM (%s %s %s)." (Ref.string_of vm)
+        (Db.VM.get_name_label ~__context ~self:vm)
+        (Db.VM_group.get_name_label ~__context ~self:group) ;
+      let no_resident_hosts =
+        VMGroupMap.find group pool_state.grp_no_resident_hosts_queue
+      in
+
+      let resident_hosts_cnt =
+        VMGroupMap.find group pool_state.grp_resident_hosts_count_map
+      in
+      info "No breach plan: total_hosts: %d, resident_hosts_cnt: %d."
+        total_hosts resident_hosts_cnt ;
+
+      if resident_hosts_cnt < 2 then (
+        debug
+          "No breach plan: there are less than 2 hosts has running VM in the \
+           same anti-affinity group, and there are still host(s) which has 0 \
+           running VMs, try to plan for it." ;
+        match select_host_for_no_breach_plan vm vm_size no_resident_hosts with
+        | None ->
+            info
+              "No breach plan: failed to select host on any of the no resident \
+               hosts, skip it, continue with the next VM." ;
+            compute_no_breach_plan ~__context total_hosts pool_state
+              remaining_vms mapping
+        | Some h ->
+            debug
+              "No breach plan: choose the no resident host with the minimal \
+               free memory which can run the vm: (%s)."
+              (Db.Host.get_name_label ~__context ~self:h) ;
+            compute_no_breach_plan ~__context total_hosts
+              (pool_state |> update_no_breach_plan_pool_state vm_size group h)
+              remaining_vms ((vm, h) :: mapping)
+      ) else (
+        debug
+          "No breach plan: no need to plan for the VM as the number of hosts \
+           which has running VMs from the same group is no less than 2, \
+           continue to plan for the next one." ;
+        compute_no_breach_plan ~__context total_hosts pool_state remaining_vms
+          mapping
+      )
+
+let anti_affinity_vms_increasing ~__context vms =
+  vms
+  |> List.filter_map (fun (vm, vm_size) ->
+         match Xapi_vm_helpers.vm_has_anti_affinity ~__context ~vm with
+         | Some (`AntiAffinity group) ->
+             Some (vm, vm_size, group)
+         | _ ->
+             None
+     )
+  |> List.sort (fun (_, a, _) (_, b, _) -> compare a b)
+
+(* Return an evacuation plan respecting VM anti-affinity rules: it is done in 3 phases:
+   1. Try to get a "spread evenly" plan for anti-affinity VMs, and then a binpack plan for
+      the rest of VMs. Done if every VM got planned, otherwise continue.
+   2. Try to get a "no breach" plan for anti-affinity VMs, and then a binpack plan for the
+      rest of VMs. Done if every VM got planned, otherwise continue.
+   3. Carry out a binpack plan ignoring VM anti-affinity. *)
+let compute_anti_affinity_evacuation_plan ~__context total_hosts hosts vms =
   let config =
     {Binpack.hosts; vms; placement= []; total_hosts; num_failures= 1}
   in
   Binpack.check_configuration config ;
-  debug "Planning configuration for offline agile VMs = %s"
-    (Binpack.string_of_configuration
-       (fun x ->
-         Printf.sprintf "%s (%s)" (Ref.short_string_of x)
-           (Db.Host.get_hostname ~__context ~self:x)
-       )
-       (fun x ->
-         Printf.sprintf "%s (%s)" (Ref.short_string_of x)
-           (Db.VM.get_name_label ~__context ~self:x)
-       )
-       config
-    ) ;
+
+  let anti_affinity_vms = vms |> anti_affinity_vms_increasing ~__context in
+
+  let spread_evenly_plan_pool_state =
+    init_spread_evenly_plan_pool_state ~__context hosts
+  in
+
+  let no_breach_plan_pool_state =
+    init_no_breach_plan_pool_state spread_evenly_plan_pool_state
+  in
+
+  let binpack_plan ~__context config vms =
+    debug "Binpack planning configuration = %s"
+      (Binpack.string_of_configuration
+         (fun x ->
+           Printf.sprintf "%s (%s)" (Ref.short_string_of x)
+             (Db.Host.get_name_label ~__context ~self:x)
+         )
+         (fun x ->
+           Printf.sprintf "%s (%s)" (Ref.short_string_of x)
+             (Db.VM.get_name_label ~__context ~self:x)
+         )
+         config
+      ) ;
+    debug "VMs to attempt to evacuate: [ %s ]"
+      (String.concat "; "
+         (vms
+         |> List.map (fun (self, _) ->
+                Printf.sprintf "%s (%s)" (Ref.short_string_of self)
+                  (Db.VM.get_name_label ~__context ~self)
+            )
+         )
+      ) ;
+    let h = Binpack.choose_heuristic config in
+    h.Binpack.get_specific_plan config (List.map fst vms)
+  in
+
+  let try_to_binpack_with_plan_applied_first plan =
+    debug "Try to binpack for the rest VMs" ;
+    let config_after_plan_applied = Binpack.apply_plan config plan in
+    let vms_to_plan =
+      let planned_vm = List.map fst plan in
+      vms |> List.filter (fun (vm, _) -> not (List.mem vm planned_vm))
+    in
+    let config_after_plan_applied =
+      {config_after_plan_applied with vms= vms_to_plan}
+    in
+    let b_plan =
+      binpack_plan ~__context config_after_plan_applied vms_to_plan
+    in
+    match List.length b_plan = List.length vms_to_plan with
+    | true ->
+        debug "Got final plan with plan applied first." ;
+        Some (plan @ b_plan)
+    | false ->
+        debug
+          "Failed to get final plan as failed to binpack for all the rest VMs." ;
+        None
+  in
+
+  let rec plan_in_steps ~__context plan_steps =
+    match plan_steps with
+    | [] ->
+        debug "Fallback to binpack plan..." ;
+        binpack_plan ~__context config vms
+    | plan_func :: plan_funcs -> (
+      match plan_func ~__context |> try_to_binpack_with_plan_applied_first with
+      | exception Api_errors.Server_error (error_code, [])
+        when error_code = Api_errors.no_hosts_available ->
+          debug
+            "Anti-affinity VMs plan failed, continue to try next step \
+             planning..." ;
+          plan_in_steps ~__context plan_funcs
+      | None ->
+          debug "Plan failed, continue to try next step planning..." ;
+          plan_in_steps ~__context plan_funcs
+      | Some plan ->
+          debug "Plan OK" ; plan
+    )
+  in
+  plan_in_steps ~__context
+    [
+      compute_spread_evenly_plan spread_evenly_plan_pool_state anti_affinity_vms
+        []
+    ; compute_no_breach_plan total_hosts no_breach_plan_pool_state
+        anti_affinity_vms []
+    ]
+
+(** Return a VM -> Host plan for the Host.evacuate code. We assume the VMs are all agile. The returned plan may
+    be incomplete if there was not enough memory. *)
+let compute_evacuation_plan ~__context total_hosts remaining_hosts
+    vms_and_snapshots =
   debug "VMs to attempt to evacuate: [ %s ]"
     (String.concat "; "
        (List.map
@@ -239,8 +626,20 @@ let compute_evacuation_plan ~__context total_hosts remaining_hosts
           vms_and_snapshots
        )
     ) ;
-  let h = Binpack.choose_heuristic config in
-  h.Binpack.get_specific_plan config (List.map fst vms_and_snapshots)
+  let hosts =
+    List.map
+      (fun host -> (host, host_free_memory ~__context ~host))
+      remaining_hosts
+  in
+  let vms =
+    List.map
+      (fun (vm, snapshot) -> (vm, vm_memory ~__context snapshot))
+      vms_and_snapshots
+  in
+  try compute_anti_affinity_evacuation_plan ~__context total_hosts hosts vms
+  with Not_found ->
+    error "Data conrupted during compute_anti_affinity_evacuation_plan." ;
+    raise (Api_errors.Server_error (Api_errors.internal_error, []))
 
 (** Passed to the planner to reason about other possible configurations, used to block operations which would
     destroy the HA VM restart plan. *)

From 99f39dc00bd7be7b4592d22ede6918679f756c00 Mon Sep 17 00:00:00 2001
From: Gang Ji <gang.ji@citrix.com>
Date: Tue, 2 Apr 2024 10:51:21 +0800
Subject: [PATCH 5/9] CP-48752: Update UT - add "groups" for VM

Signed-off-by: Gang Ji <gang.ji@citrix.com>
---
 ocaml/tests/test_ha_vm_failover.ml | 39 ++++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/ocaml/tests/test_ha_vm_failover.ml b/ocaml/tests/test_ha_vm_failover.ml
index 1c4cf79e6ea..3b4a78ef0a4 100644
--- a/ocaml/tests/test_ha_vm_failover.ml
+++ b/ocaml/tests/test_ha_vm_failover.ml
@@ -27,6 +27,8 @@ type vbd = {agile: bool}
 
 type vif = {agile: bool}
 
+type group = {name_label: string; placement: string}
+
 type vm = {
     ha_always_run: bool
   ; ha_restart_priority: string
@@ -34,6 +36,7 @@ type vm = {
   ; name_label: string
   ; vbds: vbd list
   ; vifs: vif list
+  ; groups: group list
 }
 
 let basic_vm =
@@ -44,6 +47,7 @@ let basic_vm =
   ; name_label= "vm"
   ; vbds= [{agile= true}]
   ; vifs= [{agile= true}]
+  ; groups= []
   }
 
 type host = {memory_total: int64; name_label: string; vms: vm list}
@@ -55,8 +59,13 @@ type pool = {
   ; cluster: int
 }
 
-let string_of_vm {memory; name_label; _} =
-  Printf.sprintf "{memory = %Ld; name_label = %S}" memory name_label
+let string_of_group {name_label; placement} =
+  Printf.sprintf "{name_label = %S; placement = %S}" name_label placement
+
+let string_of_vm {memory; name_label; groups; _} =
+  Printf.sprintf "{memory = %Ld; name_label = %S; groups = [%s]}" memory
+    name_label
+    (Test_printers.list string_of_group groups)
 
 let string_of_host {memory_total; name_label; vms} =
   Printf.sprintf "{memory_total = %Ld; name_label = %S; vms = [%s]}"
@@ -71,6 +80,26 @@ let string_of_pool {master; slaves; ha_host_failures_to_tolerate; cluster} =
     (Test_printers.list string_of_host slaves)
     ha_host_failures_to_tolerate cluster
 
+let load_group ~__context ~group =
+  let placement =
+    match group.placement with
+    | "anti_affinity" ->
+        `anti_affinity
+    | _ ->
+        `normal
+  in
+  match
+    Db.VM_group.get_all ~__context
+    |> List.find_opt (fun g ->
+           Db.VM_group.get_name_label ~__context ~self:g = group.name_label
+           && Db.VM_group.get_placement ~__context ~self:g = placement
+       )
+  with
+  | None ->
+      make_vm_group ~__context ~name_label:group.name_label ~placement ()
+  | Some g ->
+      g
+
 let load_vm ~__context ~(vm : vm) ~local_sr ~shared_sr ~local_net ~shared_net =
   let vm_ref =
     make_vm ~__context ~ha_always_run:vm.ha_always_run
@@ -98,6 +127,12 @@ let load_vm ~__context ~(vm : vm) ~local_sr ~shared_sr ~local_net ~shared_net =
       )
       vm.vbds
   in
+  let groups =
+    List.fold_left
+      (fun acc group -> load_group ~__context ~group :: acc)
+      [] vm.groups
+  in
+  Db.VM.set_groups ~__context ~self:vm_ref ~value:groups ;
   vm_ref
 
 let load_host ~__context ~host ~local_sr ~shared_sr ~local_net ~shared_net =

From e7126362b8bf783648733fe7913145c51e3ba39f Mon Sep 17 00:00:00 2001
From: Gang Ji <gang.ji@citrix.com>
Date: Thu, 4 Apr 2024 22:11:20 +0800
Subject: [PATCH 6/9] CP-48752: Update UT - vm set_resident_on host

Signed-off-by: Gang Ji <gang.ji@citrix.com>
---
 ocaml/tests/test_ha_vm_failover.ml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ocaml/tests/test_ha_vm_failover.ml b/ocaml/tests/test_ha_vm_failover.ml
index 3b4a78ef0a4..f2f65fab333 100644
--- a/ocaml/tests/test_ha_vm_failover.ml
+++ b/ocaml/tests/test_ha_vm_failover.ml
@@ -145,7 +145,9 @@ let load_host ~__context ~host ~local_sr ~shared_sr ~local_net ~shared_net =
   let (_ : API.ref_VM list) =
     List.map
       (fun vm ->
-        load_vm ~__context ~vm ~local_sr ~shared_sr ~local_net ~shared_net
+        let vm_ref = load_vm ~__context ~vm ~local_sr ~shared_sr ~local_net ~shared_net in
+        Db.VM.set_resident_on ~__context ~self:vm_ref ~value:host_ref ;
+        vm_ref
       )
       host.vms
   in

From 05b24d5aefc2241db09ad6d1a97bd8806fc668f6 Mon Sep 17 00:00:00 2001
From: Gang Ji <gang.ji@citrix.com>
Date: Thu, 18 Apr 2024 19:39:52 +0800
Subject: [PATCH 7/9] CP-48752: Update UT - add "power_state" for VM

Signed-off-by: Gang Ji <gang.ji@citrix.com>
---
 ocaml/tests/test_ha_vm_failover.ml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/ocaml/tests/test_ha_vm_failover.ml b/ocaml/tests/test_ha_vm_failover.ml
index f2f65fab333..6cfc580b4b3 100644
--- a/ocaml/tests/test_ha_vm_failover.ml
+++ b/ocaml/tests/test_ha_vm_failover.ml
@@ -37,6 +37,7 @@ type vm = {
   ; vbds: vbd list
   ; vifs: vif list
   ; groups: group list
+  ; power_state: string
 }
 
 let basic_vm =
@@ -48,6 +49,7 @@ let basic_vm =
   ; vbds= [{agile= true}]
   ; vifs= [{agile= true}]
   ; groups= []
+  ; power_state= "running"
   }
 
 type host = {memory_total: int64; name_label: string; vms: vm list}
@@ -133,6 +135,8 @@ let load_vm ~__context ~(vm : vm) ~local_sr ~shared_sr ~local_net ~shared_net =
       [] vm.groups
   in
   Db.VM.set_groups ~__context ~self:vm_ref ~value:groups ;
+  if "running" = vm.power_state then
+    Db.VM.set_power_state ~__context ~self:vm_ref ~value:`Running ;
   vm_ref
 
 let load_host ~__context ~host ~local_sr ~shared_sr ~local_net ~shared_net =

From fca296acbe197692f2e6115957903fe396195e9c Mon Sep 17 00:00:00 2001
From: Gang Ji <gang.ji@citrix.com>
Date: Mon, 29 Apr 2024 15:59:56 +0800
Subject: [PATCH 8/9] CP-48752: Update UT - support testing pool with more than
 3 hosts

Add "keep_localhost" to enable test pool with more than 3 hosts.

Signed-off-by: Gang Ji <gang.ji@citrix.com>
---
 ocaml/tests/test_ha_vm_failover.ml | 61 ++++++++++++++++++------------
 1 file changed, 36 insertions(+), 25 deletions(-)

diff --git a/ocaml/tests/test_ha_vm_failover.ml b/ocaml/tests/test_ha_vm_failover.ml
index 6cfc580b4b3..0359d20284d 100644
--- a/ocaml/tests/test_ha_vm_failover.ml
+++ b/ocaml/tests/test_ha_vm_failover.ml
@@ -59,8 +59,19 @@ type pool = {
   ; slaves: host list
   ; ha_host_failures_to_tolerate: int64
   ; cluster: int
+  ; keep_localhost: bool
+        (* true to get a Features.Pool_size enabled pool which can have more than 3 hosts *)
 }
 
+let basic_pool =
+  {
+    master= {memory_total= gib 256L; name_label= "master"; vms= []}
+  ; slaves= []
+  ; ha_host_failures_to_tolerate= 0L
+  ; cluster= 0
+  ; keep_localhost= false
+  }
+
 let string_of_group {name_label; placement} =
   Printf.sprintf "{name_label = %S; placement = %S}" name_label placement
 
@@ -74,13 +85,14 @@ let string_of_host {memory_total; name_label; vms} =
     memory_total name_label
     (Test_printers.list string_of_vm vms)
 
-let string_of_pool {master; slaves; ha_host_failures_to_tolerate; cluster} =
+let string_of_pool
+    {master; slaves; ha_host_failures_to_tolerate; cluster; keep_localhost} =
   Printf.sprintf
     "{master = %s; slaves = %s; ha_host_failures_to_tolerate = %Ld; cluster = \
-     %d}"
+     %d; keep_localhost = %B}"
     (string_of_host master)
     (Test_printers.list string_of_host slaves)
-    ha_host_failures_to_tolerate cluster
+    ha_host_failures_to_tolerate cluster keep_localhost
 
 let load_group ~__context ~group =
   let placement =
@@ -157,13 +169,14 @@ let load_host ~__context ~host ~local_sr ~shared_sr ~local_net ~shared_net =
   in
   host_ref
 
-let setup ~__context {master; slaves; ha_host_failures_to_tolerate; cluster} =
+let setup ~__context
+    {master; slaves; ha_host_failures_to_tolerate; cluster; keep_localhost} =
   let shared_sr = make_sr ~__context ~shared:true () in
   let shared_net = make_network ~__context ~bridge:"xenbr0" () in
-  (* Remove all hosts added by make_test_database *)
-  List.iter
-    (fun host -> Db.Host.destroy ~__context ~self:host)
-    (Db.Host.get_all ~__context) ;
+  if not keep_localhost then (* Remove all hosts added by make_test_database *)
+    List.iter
+      (fun host -> Db.Host.destroy ~__context ~self:host)
+      (Db.Host.get_all ~__context) ;
   let load_host_and_local_resources host =
     let local_sr = make_sr ~__context ~shared:false () in
     let local_net = make_network ~__context ~bridge:"xapi0" () in
@@ -225,15 +238,15 @@ module AllProtectedVms = Generic.MakeStateful (struct
       [
         (* No VMs and a single host. *)
         ( {
+            basic_pool with
             master= {memory_total= gib 256L; name_label= "master"; vms= []}
           ; slaves= []
-          ; ha_host_failures_to_tolerate= 0L
-          ; cluster= 0
           }
         , []
         )
       ; (* One unprotected VM. *)
         ( {
+            basic_pool with
             master=
               {
                 memory_total= gib 256L
@@ -244,13 +257,12 @@ module AllProtectedVms = Generic.MakeStateful (struct
                   ]
               }
           ; slaves= []
-          ; ha_host_failures_to_tolerate= 0L
-          ; cluster= 0
           }
         , []
         )
       ; (* One VM which would be protected if it was running. *)
         ( {
+            basic_pool with
             master=
               {
                 memory_total= gib 256L
@@ -258,23 +270,21 @@ module AllProtectedVms = Generic.MakeStateful (struct
               ; vms= [{basic_vm with ha_always_run= false}]
               }
           ; slaves= []
-          ; ha_host_failures_to_tolerate= 0L
-          ; cluster= 0
           }
         , []
         )
       ; (* One protected VM. *)
         ( {
+            basic_pool with
             master=
               {memory_total= gib 256L; name_label= "master"; vms= [basic_vm]}
           ; slaves= []
-          ; ha_host_failures_to_tolerate= 0L
-          ; cluster= 0
           }
         , ["vm"]
         )
       ; (* One protected VM and one unprotected VM. *)
         ( {
+            basic_pool with
             master=
               {
                 memory_total= gib 256L
@@ -291,8 +301,6 @@ module AllProtectedVms = Generic.MakeStateful (struct
                   ]
               }
           ; slaves= []
-          ; ha_host_failures_to_tolerate= 0L
-          ; cluster= 0
           }
         , ["vm1"]
         )
@@ -334,16 +342,17 @@ module PlanForNFailures = Generic.MakeStateful (struct
       [
         (* Two host pool with no VMs. *)
         ( {
+            basic_pool with
             master= {memory_total= gib 256L; name_label= "master"; vms= []}
           ; slaves= [{memory_total= gib 256L; name_label= "slave"; vms= []}]
           ; ha_host_failures_to_tolerate= 1L
-          ; cluster= 0
           }
         , Xapi_ha_vm_failover.Plan_exists_for_all_VMs
         )
       ; (* Two host pool, with one VM taking up just under half of one host's
                 * memory. *)
         ( {
+            basic_pool with
             master=
               {
                 memory_total= gib 256L
@@ -352,12 +361,12 @@ module PlanForNFailures = Generic.MakeStateful (struct
               }
           ; slaves= [{memory_total= gib 256L; name_label= "slave"; vms= []}]
           ; ha_host_failures_to_tolerate= 1L
-          ; cluster= 0
           }
         , Xapi_ha_vm_failover.Plan_exists_for_all_VMs
         )
       ; (* Two host pool, with two VMs taking up almost all of one host's memory. *)
         ( {
+            basic_pool with
             master=
               {
                 memory_total= gib 256L
@@ -370,12 +379,12 @@ module PlanForNFailures = Generic.MakeStateful (struct
               }
           ; slaves= [{memory_total= gib 256L; name_label= "slave"; vms= []}]
           ; ha_host_failures_to_tolerate= 1L
-          ; cluster= 0
           }
         , Xapi_ha_vm_failover.Plan_exists_for_all_VMs
         )
       ; (* Two host pool, overcommitted. *)
         ( {
+            basic_pool with
             master=
               {
                 memory_total= gib 256L
@@ -399,7 +408,6 @@ module PlanForNFailures = Generic.MakeStateful (struct
                 }
               ]
           ; ha_host_failures_to_tolerate= 1L
-          ; cluster= 0
           }
         , Xapi_ha_vm_failover.No_plan_exists
         )
@@ -460,6 +468,7 @@ module AssertNewVMPreservesHAPlan = Generic.MakeStateful (struct
         (* 2 host pool, one VM using just under half of one host's memory;
                 * test that another VM can be added. *)
         ( ( {
+              basic_pool with
               master=
                 {
                   memory_total= gib 256L
@@ -468,7 +477,6 @@ module AssertNewVMPreservesHAPlan = Generic.MakeStateful (struct
                 }
             ; slaves= [{memory_total= gib 256L; name_label= "slave"; vms= []}]
             ; ha_host_failures_to_tolerate= 1L
-            ; cluster= 0
             }
           , {
               basic_vm with
@@ -483,6 +491,7 @@ module AssertNewVMPreservesHAPlan = Generic.MakeStateful (struct
       ; (* 2 host pool, two VMs using almost all of one host's memory;
                 * test that another VM cannot be added. *)
         ( ( {
+              basic_pool with
               master=
                 {
                   memory_total= gib 256L
@@ -495,7 +504,6 @@ module AssertNewVMPreservesHAPlan = Generic.MakeStateful (struct
                 }
             ; slaves= [{memory_total= gib 256L; name_label= "slave"; vms= []}]
             ; ha_host_failures_to_tolerate= 1L
-            ; cluster= 0
             }
           , {
               basic_vm with
@@ -513,6 +521,7 @@ module AssertNewVMPreservesHAPlan = Generic.MakeStateful (struct
       ; (* 2 host pool which is already overcommitted. Attempting to add another VM
                 * should not throw an exception. *)
         ( ( {
+              basic_pool with
               master=
                 {
                   memory_total= gib 256L
@@ -532,7 +541,6 @@ module AssertNewVMPreservesHAPlan = Generic.MakeStateful (struct
                   }
                 ]
             ; ha_host_failures_to_tolerate= 1L
-            ; cluster= 0
             }
           , {
               basic_vm with
@@ -574,6 +582,7 @@ module ComputeMaxFailures = Generic.MakeStateful (struct
       [
         (* Three host pool with no VMs. *)
         ( {
+            basic_pool with
             master= {memory_total= gib 256L; name_label= "master"; vms= []}
           ; slaves=
               [
@@ -589,6 +598,7 @@ module ComputeMaxFailures = Generic.MakeStateful (struct
         )
       ; (* Two hosts pool with no VMs  *)
         ( {
+            basic_pool with
             master= {memory_total= gib 256L; name_label= "master"; vms= []}
           ; slaves= [{memory_total= gib 256L; name_label= "slave1"; vms= []}]
           ; ha_host_failures_to_tolerate= 2L
@@ -599,6 +609,7 @@ module ComputeMaxFailures = Generic.MakeStateful (struct
         )
       ; (* Two host pool with one down  *)
         ( {
+            basic_pool with
             master= {memory_total= gib 256L; name_label= "master"; vms= []}
           ; slaves= [{memory_total= gib 256L; name_label= "slave1"; vms= []}]
           ; ha_host_failures_to_tolerate= 2L

From 96348f1be2a9da5e5cccca006b87640531e5f72e Mon Sep 17 00:00:00 2001
From: Gang Ji <gang.ji@citrix.com>
Date: Mon, 29 Apr 2024 16:05:11 +0800
Subject: [PATCH 9/9] CP-48752: Add UT for host evacuation with anti-affinity
 support

Signed-off-by: Gang Ji <gang.ji@citrix.com>
---
 ocaml/tests/test_ha_vm_failover.ml | 899 +++++++++++++++++++++++++++--
 ocaml/xapi/xapi_ha_vm_failover.mli |  58 ++
 2 files changed, 900 insertions(+), 57 deletions(-)

diff --git a/ocaml/tests/test_ha_vm_failover.ml b/ocaml/tests/test_ha_vm_failover.ml
index 0359d20284d..163ea0fd402 100644
--- a/ocaml/tests/test_ha_vm_failover.ml
+++ b/ocaml/tests/test_ha_vm_failover.ml
@@ -27,13 +27,15 @@ type vbd = {agile: bool}
 
 type vif = {agile: bool}
 
-type group = {name_label: string; placement: string}
+type placement_policy = AntiAffinity | Normal
+
+type group = {name_label: string; placement: placement_policy}
 
 type vm = {
     ha_always_run: bool
   ; ha_restart_priority: string
   ; memory: int64
-  ; name_label: string
+  ; vm_name_label: string
   ; vbds: vbd list
   ; vifs: vif list
   ; groups: group list
@@ -45,7 +47,7 @@ let basic_vm =
     ha_always_run= true
   ; ha_restart_priority= "restart"
   ; memory= gib 1L
-  ; name_label= "vm"
+  ; vm_name_label= "vm"
   ; vbds= [{agile= true}]
   ; vifs= [{agile= true}]
   ; groups= []
@@ -63,9 +65,62 @@ type pool = {
         (* true to get a Features.Pool_size enabled pool which can have more than 3 hosts *)
 }
 
+let master = "master"
+
+let slave = "slave"
+
+let slave1 = "slave1"
+
+let slave2 = "slave2"
+
+let slave3 = "slave3"
+
+let grp1 = "grp1"
+
+let grp2 = "grp2"
+
+(** vmX_grpY: in test case for anti_affinity, the VM is the Xth smallest of slave1's VMs of
+    the same placement type in terms of VM's memory size, and it belows to VM group: grpY. *)
+let vm1_grp1 = "vm1_grp1"
+
+let vm2_grp1 = "vm2_grp1"
+
+let vm3_grp1 = "vm3_grp1"
+
+let vm4_grp1 = "vm4_grp1"
+
+let vm5_grp1 = "vm5_grp1"
+
+let vm6_grp1 = "vm6_grp1"
+
+let vm8_grp1 = "vm8_grp1"
+
+let vm2_grp2 = "vm2_grp2"
+
+let vm3_grp2 = "vm3_grp2"
+
+let vm4_grp2 = "vm4_grp2"
+
+let vm5_grp2 = "vm5_grp2"
+
+let vm7_grp2 = "vm7_grp2"
+
+(** In test case for anti_affinity, it is a VM resident on host other than slave1 *)
+let vm_grp1 = "vm_grp1"
+
+(** vmX: in test case for anti_affinity, it is a VM not in any VM group, and it is the Xth
+    largest of slave1's VMs not in any VM group in terms of VM's memory size. *)
+let vm1 = "vm1"
+
+let vm2 = "vm2"
+
+let vm3 = "vm3"
+
+let vm4 = "vm4"
+
 let basic_pool =
   {
-    master= {memory_total= gib 256L; name_label= "master"; vms= []}
+    master= {memory_total= gib 256L; name_label= master; vms= []}
   ; slaves= []
   ; ha_host_failures_to_tolerate= 0L
   ; cluster= 0
@@ -73,11 +128,12 @@ let basic_pool =
   }
 
 let string_of_group {name_label; placement} =
-  Printf.sprintf "{name_label = %S; placement = %S}" name_label placement
+  Printf.sprintf "{name_label = %S; placement = %S}" name_label
+    (match placement with AntiAffinity -> "anti_affinity" | Normal -> "normal")
 
-let string_of_vm {memory; name_label; groups; _} =
+let string_of_vm {memory; vm_name_label; groups; _} =
   Printf.sprintf "{memory = %Ld; name_label = %S; groups = [%s]}" memory
-    name_label
+    vm_name_label
     (Test_printers.list string_of_group groups)
 
 let string_of_host {memory_total; name_label; vms} =
@@ -97,9 +153,9 @@ let string_of_pool
 let load_group ~__context ~group =
   let placement =
     match group.placement with
-    | "anti_affinity" ->
+    | AntiAffinity ->
         `anti_affinity
-    | _ ->
+    | Normal ->
         `normal
   in
   match
@@ -119,7 +175,7 @@ let load_vm ~__context ~(vm : vm) ~local_sr ~shared_sr ~local_net ~shared_net =
     make_vm ~__context ~ha_always_run:vm.ha_always_run
       ~ha_restart_priority:vm.ha_restart_priority ~memory_static_min:vm.memory
       ~memory_dynamic_min:vm.memory ~memory_dynamic_max:vm.memory
-      ~memory_static_max:vm.memory ~name_label:vm.name_label ()
+      ~memory_static_max:vm.memory ~name_label:vm.vm_name_label ()
   in
   let (_ : API.ref_VIF list) =
     List.mapi
@@ -161,7 +217,9 @@ let load_host ~__context ~host ~local_sr ~shared_sr ~local_net ~shared_net =
   let (_ : API.ref_VM list) =
     List.map
       (fun vm ->
-        let vm_ref = load_vm ~__context ~vm ~local_sr ~shared_sr ~local_net ~shared_net in
+        let vm_ref =
+          load_vm ~__context ~vm ~local_sr ~shared_sr ~local_net ~shared_net
+        in
         Db.VM.set_resident_on ~__context ~self:vm_ref ~value:host_ref ;
         vm_ref
       )
@@ -239,7 +297,7 @@ module AllProtectedVms = Generic.MakeStateful (struct
         (* No VMs and a single host. *)
         ( {
             basic_pool with
-            master= {memory_total= gib 256L; name_label= "master"; vms= []}
+            master= {memory_total= gib 256L; name_label= master; vms= []}
           ; slaves= []
           }
         , []
@@ -250,7 +308,7 @@ module AllProtectedVms = Generic.MakeStateful (struct
             master=
               {
                 memory_total= gib 256L
-              ; name_label= "master"
+              ; name_label= master
               ; vms=
                   [
                     {basic_vm with ha_always_run= false; ha_restart_priority= ""}
@@ -266,7 +324,7 @@ module AllProtectedVms = Generic.MakeStateful (struct
             master=
               {
                 memory_total= gib 256L
-              ; name_label= "master"
+              ; name_label= master
               ; vms= [{basic_vm with ha_always_run= false}]
               }
           ; slaves= []
@@ -277,7 +335,7 @@ module AllProtectedVms = Generic.MakeStateful (struct
         ( {
             basic_pool with
             master=
-              {memory_total= gib 256L; name_label= "master"; vms= [basic_vm]}
+              {memory_total= gib 256L; name_label= master; vms= [basic_vm]}
           ; slaves= []
           }
         , ["vm"]
@@ -288,21 +346,21 @@ module AllProtectedVms = Generic.MakeStateful (struct
             master=
               {
                 memory_total= gib 256L
-              ; name_label= "master"
+              ; name_label= master
               ; vms=
                   [
-                    {basic_vm with name_label= "vm1"}
+                    {basic_vm with vm_name_label= vm1}
                   ; {
                       basic_vm with
                       ha_always_run= false
                     ; ha_restart_priority= ""
-                    ; name_label= "vm2"
+                    ; vm_name_label= vm2
                     }
                   ]
               }
           ; slaves= []
           }
-        , ["vm1"]
+        , [vm1]
         )
       ]
 end)
@@ -343,8 +401,8 @@ module PlanForNFailures = Generic.MakeStateful (struct
         (* Two host pool with no VMs. *)
         ( {
             basic_pool with
-            master= {memory_total= gib 256L; name_label= "master"; vms= []}
-          ; slaves= [{memory_total= gib 256L; name_label= "slave"; vms= []}]
+            master= {memory_total= gib 256L; name_label= master; vms= []}
+          ; slaves= [{memory_total= gib 256L; name_label= slave; vms= []}]
           ; ha_host_failures_to_tolerate= 1L
           }
         , Xapi_ha_vm_failover.Plan_exists_for_all_VMs
@@ -356,10 +414,10 @@ module PlanForNFailures = Generic.MakeStateful (struct
             master=
               {
                 memory_total= gib 256L
-              ; name_label= "master"
-              ; vms= [{basic_vm with memory= gib 120L; name_label= "vm1"}]
+              ; name_label= master
+              ; vms= [{basic_vm with memory= gib 120L; vm_name_label= vm1}]
               }
-          ; slaves= [{memory_total= gib 256L; name_label= "slave"; vms= []}]
+          ; slaves= [{memory_total= gib 256L; name_label= slave; vms= []}]
           ; ha_host_failures_to_tolerate= 1L
           }
         , Xapi_ha_vm_failover.Plan_exists_for_all_VMs
@@ -370,14 +428,14 @@ module PlanForNFailures = Generic.MakeStateful (struct
             master=
               {
                 memory_total= gib 256L
-              ; name_label= "master"
+              ; name_label= master
               ; vms=
                   [
-                    {basic_vm with memory= gib 120L; name_label= "vm1"}
-                  ; {basic_vm with memory= gib 120L; name_label= "vm2"}
+                    {basic_vm with memory= gib 120L; vm_name_label= vm1}
+                  ; {basic_vm with memory= gib 120L; vm_name_label= vm2}
                   ]
               }
-          ; slaves= [{memory_total= gib 256L; name_label= "slave"; vms= []}]
+          ; slaves= [{memory_total= gib 256L; name_label= slave; vms= []}]
           ; ha_host_failures_to_tolerate= 1L
           }
         , Xapi_ha_vm_failover.Plan_exists_for_all_VMs
@@ -388,22 +446,22 @@ module PlanForNFailures = Generic.MakeStateful (struct
             master=
               {
                 memory_total= gib 256L
-              ; name_label= "master"
+              ; name_label= master
               ; vms=
                   [
-                    {basic_vm with memory= gib 120L; name_label= "vm1"}
-                  ; {basic_vm with memory= gib 120L; name_label= "vm2"}
+                    {basic_vm with memory= gib 120L; vm_name_label= vm1}
+                  ; {basic_vm with memory= gib 120L; vm_name_label= vm2}
                   ]
               }
           ; slaves=
               [
                 {
                   memory_total= gib 256L
-                ; name_label= "slave"
+                ; name_label= slave
                 ; vms=
                     [
-                      {basic_vm with memory= gib 120L; name_label= "vm3"}
-                    ; {basic_vm with memory= gib 120L; name_label= "vm4"}
+                      {basic_vm with memory= gib 120L; vm_name_label= vm3}
+                    ; {basic_vm with memory= gib 120L; vm_name_label= vm4}
                     ]
                 }
               ]
@@ -472,10 +530,10 @@ module AssertNewVMPreservesHAPlan = Generic.MakeStateful (struct
               master=
                 {
                   memory_total= gib 256L
-                ; name_label= "master"
-                ; vms= [{basic_vm with memory= gib 120L; name_label= "vm1"}]
+                ; name_label= master
+                ; vms= [{basic_vm with memory= gib 120L; vm_name_label= vm1}]
                 }
-            ; slaves= [{memory_total= gib 256L; name_label= "slave"; vms= []}]
+            ; slaves= [{memory_total= gib 256L; name_label= slave; vms= []}]
             ; ha_host_failures_to_tolerate= 1L
             }
           , {
@@ -483,7 +541,7 @@ module AssertNewVMPreservesHAPlan = Generic.MakeStateful (struct
               ha_always_run= false
             ; ha_restart_priority= "restart"
             ; memory= gib 120L
-            ; name_label= "vm2"
+            ; vm_name_label= vm2
             }
           )
         , Ok ()
@@ -495,14 +553,14 @@ module AssertNewVMPreservesHAPlan = Generic.MakeStateful (struct
               master=
                 {
                   memory_total= gib 256L
-                ; name_label= "master"
+                ; name_label= master
                 ; vms=
                     [
-                      {basic_vm with memory= gib 120L; name_label= "vm1"}
-                    ; {basic_vm with memory= gib 120L; name_label= "vm2"}
+                      {basic_vm with memory= gib 120L; vm_name_label= vm1}
+                    ; {basic_vm with memory= gib 120L; vm_name_label= vm2}
                     ]
                 }
-            ; slaves= [{memory_total= gib 256L; name_label= "slave"; vms= []}]
+            ; slaves= [{memory_total= gib 256L; name_label= slave; vms= []}]
             ; ha_host_failures_to_tolerate= 1L
             }
           , {
@@ -510,7 +568,7 @@ module AssertNewVMPreservesHAPlan = Generic.MakeStateful (struct
               ha_always_run= false
             ; ha_restart_priority= "restart"
             ; memory= gib 120L
-            ; name_label= "vm2"
+            ; vm_name_label= vm2
             }
           )
         , Error
@@ -525,19 +583,19 @@ module AssertNewVMPreservesHAPlan = Generic.MakeStateful (struct
               master=
                 {
                   memory_total= gib 256L
-                ; name_label= "master"
+                ; name_label= master
                 ; vms=
                     [
-                      {basic_vm with memory= gib 120L; name_label= "vm1"}
-                    ; {basic_vm with memory= gib 120L; name_label= "vm2"}
+                      {basic_vm with memory= gib 120L; vm_name_label= vm1}
+                    ; {basic_vm with memory= gib 120L; vm_name_label= vm2}
                     ]
                 }
             ; slaves=
                 [
                   {
                     memory_total= gib 256L
-                  ; name_label= "slave"
-                  ; vms= [{basic_vm with memory= gib 120L; name_label= "vm1"}]
+                  ; name_label= slave
+                  ; vms= [{basic_vm with memory= gib 120L; vm_name_label= vm1}]
                   }
                 ]
             ; ha_host_failures_to_tolerate= 1L
@@ -547,7 +605,7 @@ module AssertNewVMPreservesHAPlan = Generic.MakeStateful (struct
               ha_always_run= false
             ; ha_restart_priority= "restart"
             ; memory= gib 120L
-            ; name_label= "vm2"
+            ; vm_name_label= vm2
             }
           )
         , Ok ()
@@ -583,11 +641,11 @@ module ComputeMaxFailures = Generic.MakeStateful (struct
         (* Three host pool with no VMs. *)
         ( {
             basic_pool with
-            master= {memory_total= gib 256L; name_label= "master"; vms= []}
+            master= {memory_total= gib 256L; name_label= master; vms= []}
           ; slaves=
               [
-                {memory_total= gib 256L; name_label= "slave1"; vms= []}
-              ; {memory_total= gib 256L; name_label= "slave2"; vms= []}
+                {memory_total= gib 256L; name_label= slave1; vms= []}
+              ; {memory_total= gib 256L; name_label= slave2; vms= []}
               ]
           ; (* Placeholder value that is overridden when we call the compute function *)
             ha_host_failures_to_tolerate= 3L
@@ -599,8 +657,8 @@ module ComputeMaxFailures = Generic.MakeStateful (struct
       ; (* Two hosts pool with no VMs  *)
         ( {
             basic_pool with
-            master= {memory_total= gib 256L; name_label= "master"; vms= []}
-          ; slaves= [{memory_total= gib 256L; name_label= "slave1"; vms= []}]
+            master= {memory_total= gib 256L; name_label= master; vms= []}
+          ; slaves= [{memory_total= gib 256L; name_label= slave1; vms= []}]
           ; ha_host_failures_to_tolerate= 2L
           ; cluster= 2
           }
@@ -610,8 +668,8 @@ module ComputeMaxFailures = Generic.MakeStateful (struct
       ; (* Two host pool with one down  *)
         ( {
             basic_pool with
-            master= {memory_total= gib 256L; name_label= "master"; vms= []}
-          ; slaves= [{memory_total= gib 256L; name_label= "slave1"; vms= []}]
+            master= {memory_total= gib 256L; name_label= master; vms= []}
+          ; slaves= [{memory_total= gib 256L; name_label= slave1; vms= []}]
           ; ha_host_failures_to_tolerate= 2L
           ; cluster= 1
           }
@@ -621,4 +679,731 @@ module ComputeMaxFailures = Generic.MakeStateful (struct
       ]
 end)
 
-let tests = [("plan_for_n_failures", PlanForNFailures.tests)]
+let extract_output_for_anti_affinity_plan __context anti_affinity_plan =
+  let slv1 =
+    Db.Host.get_all ~__context
+    |> List.find (fun self -> Db.Host.get_name_label ~__context ~self = slave1)
+  in
+  let slave1_anti_affinity_vms =
+    Db.Host.get_resident_VMs ~__context ~self:slv1
+    |> List.map (fun self -> (self, Db.VM.get_record ~__context ~self))
+    |> List.filter (fun (_, record) -> not record.API.vM_is_control_domain)
+    |> List.map (fun (self, record) ->
+           (self, Xapi_ha_vm_failover.vm_memory ~__context record)
+       )
+    |> Xapi_ha_vm_failover.anti_affinity_vms_increasing ~__context
+  in
+  try
+    anti_affinity_plan ~__context slave1_anti_affinity_vms []
+    |> List.map (fun (vm, host) ->
+           ( Db.VM.get_name_label ~__context ~self:vm
+           , Db.Host.get_name_label ~__context ~self:host
+           )
+       )
+  with Api_errors.Server_error ("NO_HOSTS_AVAILABLE", []) as e ->
+    [("Anti-affinity VMs plan failed", Printexc.to_string e)]
+
+let anti_affinity_grp1 = {name_label= grp1; placement= AntiAffinity}
+
+let anti_affinity_plan_test_cases =
+  [
+    (* Test 0: No VMs in slave1 to be evacuated. *)
+    ( {
+        basic_pool with
+        master= {memory_total= gib 256L; name_label= master; vms= []}
+      ; slaves=
+          [
+            {memory_total= gib 256L; name_label= slave1; vms= []}
+          ; {memory_total= gib 256L; name_label= slave2; vms= []}
+          ]
+      }
+    , (* Assert that spread_evenly_plan returns as expected *)
+      []
+    , (* Assert that no_breach_plan returns as expected *)
+      []
+    )
+  ; (* Test 1: No anti-affinity VMs in slave1 to be evacuated *)
+    ( {
+        basic_pool with
+        master= {memory_total= gib 256L; name_label= master; vms= []}
+      ; slaves=
+          [
+            {
+              memory_total= gib 256L
+            ; name_label= slave1
+            ; vms=
+                [
+                  {basic_vm with memory= gib 120L; vm_name_label= vm1}
+                ; {basic_vm with memory= gib 120L; vm_name_label= vm2}
+                ]
+            }
+          ; {memory_total= gib 256L; name_label= slave2; vms= []}
+          ]
+      }
+    , (* Assert that spread_evenly_plan returns as expected *)
+      []
+    , (* Assert that no_breach_plan returns as expected *)
+      []
+    )
+  ; (* Test 2: One anti-affinity VM in slave1 to be evacuated *)
+    ( {
+        basic_pool with
+        master= {memory_total= gib 512L; name_label= master; vms= []}
+      ; slaves=
+          [
+            {
+              memory_total= gib 256L
+            ; name_label= slave1
+            ; vms=
+                [
+                  {
+                    basic_vm with
+                    memory= gib 120L
+                  ; vm_name_label= vm1_grp1
+                  ; groups= [anti_affinity_grp1]
+                  }
+                ; {basic_vm with memory= gib 120L; vm_name_label= vm1}
+                ]
+            }
+          ; {memory_total= gib 256L; name_label= slave2; vms= []}
+          ]
+      }
+    , (* Assert that spread_evenly_plan returns as expected *)
+      [(vm1_grp1, slave2)]
+    , (* Assert that no_breach_plan returns as expected *)
+      [(vm1_grp1, slave2)]
+    )
+  ; (* Test 3: One anti-affinity VM in slave1 to be evacuated, the smallest host already has anti-affinity VM in the same group *)
+    ( {
+        basic_pool with
+        master= {memory_total= gib 512L; name_label= master; vms= []}
+      ; slaves=
+          [
+            {
+              memory_total= gib 256L
+            ; name_label= slave1
+            ; vms=
+                [
+                  {
+                    basic_vm with
+                    memory= gib 120L
+                  ; vm_name_label= vm1_grp1
+                  ; groups= [anti_affinity_grp1]
+                  }
+                ; {basic_vm with memory= gib 120L; vm_name_label= "vm2"}
+                ]
+            }
+          ; {
+              memory_total= gib 256L
+            ; name_label= slave2
+            ; vms=
+                [
+                  {
+                    basic_vm with
+                    memory= gib 120L
+                  ; vm_name_label= vm_grp1
+                  ; groups= [anti_affinity_grp1]
+                  }
+                ]
+            }
+          ]
+      }
+    , (* Assert that spread_evenly_plan returns as expected *)
+      [(vm1_grp1, master)]
+    , (* Assert that no_breach_plan returns as expected *)
+      [(vm1_grp1, master)]
+    )
+  ; (* Test 4: Two anti-affinity VMs belong to one group in slave1 to be evacuated *)
+    ( {
+        basic_pool with
+        master= {memory_total= gib 512L; name_label= master; vms= []}
+      ; slaves=
+          [
+            {
+              memory_total= gib 256L
+            ; name_label= slave1
+            ; vms=
+                [
+                  {
+                    basic_vm with
+                    memory= gib 120L
+                  ; vm_name_label= vm1_grp1
+                  ; groups= [anti_affinity_grp1]
+                  }
+                ; {
+                    basic_vm with
+                    memory= gib 130L
+                  ; vm_name_label= vm2_grp1
+                  ; groups= [anti_affinity_grp1]
+                  }
+                ]
+            }
+          ; {memory_total= gib 256L; name_label= slave2; vms= []}
+          ]
+      }
+    , (* Assert that spread_evenly_plan returns as expected *)
+      [(vm2_grp1, master); (vm1_grp1, slave2)]
+    , (* Assert that no_breach_plan returns as expected *)
+      [(vm2_grp1, master); (vm1_grp1, slave2)]
+    )
+  ; (* Test 5: Two anti-affinity VMs belong to one group in slave1 to be evacuated, only 1 can be planed *)
+    ( {
+        basic_pool with
+        master= {memory_total= gib 512L; name_label= master; vms= []}
+      ; slaves=
+          [
+            {
+              memory_total= gib 256L
+            ; name_label= slave1
+            ; vms=
+                [
+                  {
+                    basic_vm with
+                    memory= gib 120L
+                  ; vm_name_label= vm1_grp1
+                  ; groups= [anti_affinity_grp1]
+                  }
+                ; {
+                    basic_vm with
+                    memory= gib 513L
+                  ; vm_name_label= vm2_grp1
+                  ; groups= [anti_affinity_grp1]
+                  }
+                ]
+            }
+          ; {memory_total= gib 256L; name_label= slave2; vms= []}
+          ]
+      }
+    , (* Assert that spread_evenly_plan returns as expected *)
+      [
+        ( "Anti-affinity VMs plan failed"
+        , "Server_error(NO_HOSTS_AVAILABLE, [  ])"
+        )
+      ]
+    , (* Assert that no_breach_plan returns as expected *)
+      [(vm1_grp1, slave2)]
+    )
+  ; (* Test 6: 6 anti-affinity VMs belong to one group in slave1 to be evacuated, only 5 can be planned *)
+    ( {
+        basic_pool with
+        master= {memory_total= gib 640L; name_label= master; vms= []}
+      ; slaves=
+          [
+            {
+              memory_total= gib 256L
+            ; name_label= slave1
+            ; vms=
+                [
+                  {
+                    basic_vm with
+                    memory= gib 120L
+                  ; vm_name_label= vm2_grp1
+                  ; groups= [anti_affinity_grp1]
+                  }
+                ; {
+                    basic_vm with
+                    memory= gib 60L
+                  ; vm_name_label= vm1_grp1
+                  ; groups= [anti_affinity_grp1]
+                  }
+                ; {
+                    basic_vm with
+                    memory= gib 400L
+                  ; vm_name_label= vm6_grp1
+                  ; groups= [anti_affinity_grp1]
+                  }
+                ; {
+                    basic_vm with
+                    memory= gib 250L
+                  ; vm_name_label= vm4_grp1
+                  ; groups= [anti_affinity_grp1]
+                  }
+                ; {
+                    basic_vm with
+                    memory= gib 260L
+                  ; vm_name_label= vm5_grp1
+                  ; groups= [anti_affinity_grp1]
+                  }
+                ; {
+                    basic_vm with
+                    memory= gib 130L
+                  ; vm_name_label= vm3_grp1
+                  ; groups= [anti_affinity_grp1]
+                  }
+                ]
+            }
+          ; {memory_total= gib 256L; name_label= slave2; vms= []}
+          ]
+      }
+    , (* Assert that spread_evenly_plan returns as expected *)
+      [
+        ( "Anti-affinity VMs plan failed"
+        , "Server_error(NO_HOSTS_AVAILABLE, [  ])"
+        )
+      ]
+    , (* Assert that no_breach_plan returns as expected *)
+      [(vm2_grp1, master); (vm1_grp1, slave2)]
+    )
+  ; (* Test 7: Two groups anti-affinity VMs in slave1 to be evacuated *)
+    ( {
+        basic_pool with
+        master= {memory_total= gib 512L; name_label= master; vms= []}
+      ; slaves=
+          [
+            {
+              memory_total= gib 256L
+            ; name_label= slave1
+            ; vms=
+                [
+                  {
+                    basic_vm with
+                    memory= gib 120L
+                  ; vm_name_label= vm6_grp1
+                  ; groups= [anti_affinity_grp1]
+                  }
+                ; {
+                    basic_vm with
+                    memory= gib 60L
+                  ; vm_name_label= vm5_grp2
+                  ; groups= [{name_label= grp2; placement= AntiAffinity}]
+                  }
+                ; {
+                    basic_vm with
+                    memory= gib 130L
+                  ; vm_name_label= vm7_grp2
+                  ; groups= [{name_label= grp2; placement= AntiAffinity}]
+                  }
+                ; {
+                    basic_vm with
+                    memory= gib 1L
+                  ; vm_name_label= vm1_grp1
+                  ; groups= [anti_affinity_grp1]
+                  }
+                ; {
+                    basic_vm with
+                    memory= gib 2L
+                  ; vm_name_label= vm2_grp2
+                  ; groups= [{name_label= grp2; placement= AntiAffinity}]
+                  }
+                ; {
+                    basic_vm with
+                    memory= gib 3L
+                  ; vm_name_label= vm3_grp1
+                  ; groups= [anti_affinity_grp1]
+                  }
+                ; {
+                    basic_vm with
+                    memory= gib 4L
+                  ; vm_name_label= vm4_grp2
+                  ; groups= [{name_label= grp2; placement= AntiAffinity}]
+                  }
+                ]
+            }
+          ; {memory_total= gib 256L; name_label= slave2; vms= []}
+          ]
+      }
+    , (* Assert that spread_evenly_plan returns as expected *)
+      [
+        (vm7_grp2, master)
+      ; (vm6_grp1, slave2)
+      ; (vm5_grp2, slave2)
+      ; (vm4_grp2, master)
+      ; (vm3_grp1, master)
+      ; (vm2_grp2, slave2)
+      ; (vm1_grp1, slave2)
+      ]
+    , (* Assert that no_breach_plan returns as expected *)
+      [
+        (vm4_grp2, master)
+      ; (vm3_grp1, master)
+      ; (vm2_grp2, slave2)
+      ; (vm1_grp1, slave2)
+      ]
+    )
+  ; (* Test 8: Two groups anti-affinity VMs in slave1 to be evacuated, master is bigger than slave2 in size when started, but becomes smaller during planning *)
+    ( {
+        basic_pool with
+        master= {memory_total= gib 512L; name_label= master; vms= []}
+      ; slaves=
+          [
+            {
+              memory_total= gib 256L
+            ; name_label= slave1
+            ; vms=
+                [
+                  {
+                    basic_vm with
+                    memory= gib 120L
+                  ; vm_name_label= vm6_grp1
+                  ; groups= [anti_affinity_grp1]
+                  }
+                ; {
+                    basic_vm with
+                    memory= gib 60L
+                  ; vm_name_label= vm5_grp2
+                  ; groups= [{name_label= grp2; placement= AntiAffinity}]
+                  }
+                ; {
+                    basic_vm with
+                    memory= gib 130L
+                  ; vm_name_label= vm7_grp2
+                  ; groups= [{name_label= grp2; placement= AntiAffinity}]
+                  }
+                ; {
+                    basic_vm with
+                    memory= gib 1L
+                  ; vm_name_label= vm1_grp1
+                  ; groups= [anti_affinity_grp1]
+                  }
+                ; {
+                    basic_vm with
+                    memory= gib 6L
+                  ; vm_name_label= vm3_grp2
+                  ; groups= [{name_label= grp2; placement= AntiAffinity}]
+                  }
+                ; {
+                    basic_vm with
+                    memory= gib 5L
+                  ; vm_name_label= vm2_grp1
+                  ; groups= [anti_affinity_grp1]
+                  }
+                ; {
+                    basic_vm with
+                    memory= gib 7L
+                  ; vm_name_label= vm4_grp2
+                  ; groups= [{name_label= grp2; placement= AntiAffinity}]
+                  }
+                ]
+            }
+          ; {memory_total= gib 510L; name_label= slave2; vms= []}
+          ]
+      }
+    , (* Assert that spread_evenly_plan returns as expected *)
+      [
+        (vm7_grp2, slave2)
+      ; (vm6_grp1, master)
+      ; (vm5_grp2, master)
+      ; (vm4_grp2, slave2)
+      ; (vm3_grp2, master)
+      ; (vm2_grp1, master)
+      ; (vm1_grp1, slave2)
+      ]
+    , (* Assert that no_breach_plan returns as expected *)
+      [
+        (vm4_grp2, slave2)
+      ; (vm3_grp2, master)
+      ; (vm2_grp1, master)
+      ; (vm1_grp1, slave2)
+      ]
+    )
+  ]
+
+module Slave1EvacuationVMAntiAffinitySpreadEvenlyPlan =
+Generic.MakeStateful (struct
+  module Io = struct
+    type input_t = pool
+
+    type output_t = (string * string) list
+
+    let string_of_input_t = string_of_pool
+
+    let string_of_output_t = Test_printers.(list (pair string string))
+  end
+
+  module State = Test_state.XapiDb
+
+  let load_input __context = setup ~__context
+
+  let extract_output __context _ =
+    let slv1 =
+      Db.Host.get_all ~__context
+      |> List.find (fun self -> Db.Host.get_name_label ~__context ~self = slave1)
+    in
+    let hosts =
+      Db.Host.get_all ~__context
+      |> List.filter (fun h ->
+             h <> slv1
+             && Db.Host.get_name_label ~__context ~self:h <> "localhost"
+         )
+      |> List.map (fun host ->
+             (host, Xapi_ha_vm_failover.host_free_memory ~__context ~host)
+         )
+    in
+    let pool_state =
+      Xapi_ha_vm_failover.init_spread_evenly_plan_pool_state ~__context hosts
+    in
+    extract_output_for_anti_affinity_plan __context
+      (Xapi_ha_vm_failover.compute_spread_evenly_plan pool_state)
+
+  let tests =
+    `QuickAndAutoDocumented
+      (anti_affinity_plan_test_cases
+      |> List.map (fun (pool, spread_evenly_plan, _no_breach_plan) ->
+             (pool, spread_evenly_plan)
+         )
+      )
+end)
+
+module Slave1EvacuationVMAntiAffinityNoBreachPlan = Generic.MakeStateful (struct
+  module Io = struct
+    type input_t = pool
+
+    type output_t = (string * string) list
+
+    let string_of_input_t = string_of_pool
+
+    let string_of_output_t = Test_printers.(list (pair string string))
+  end
+
+  module State = Test_state.XapiDb
+
+  let load_input __context = setup ~__context
+
+  let extract_output __context _ =
+    let total_hosts =
+      Db.Host.get_all ~__context
+      |> List.filter (fun h ->
+             Db.Host.get_name_label ~__context ~self:h <> "localhost"
+         )
+      |> List.length
+    in
+    let slv1 =
+      Db.Host.get_all ~__context
+      |> List.find (fun self -> Db.Host.get_name_label ~__context ~self = slave1)
+    in
+    let hosts =
+      Db.Host.get_all ~__context
+      |> List.filter (fun h ->
+             h <> slv1
+             && Db.Host.get_name_label ~__context ~self:h <> "localhost"
+         )
+      |> List.map (fun host ->
+             (host, Xapi_ha_vm_failover.host_free_memory ~__context ~host)
+         )
+    in
+    let pool_state =
+      Xapi_ha_vm_failover.init_spread_evenly_plan_pool_state ~__context hosts
+      |> Xapi_ha_vm_failover.init_no_breach_plan_pool_state
+    in
+    extract_output_for_anti_affinity_plan __context
+      (Xapi_ha_vm_failover.compute_no_breach_plan total_hosts pool_state)
+
+  let tests =
+    `QuickAndAutoDocumented
+      (anti_affinity_plan_test_cases
+      |> List.map (fun (pool, _spread_evenly_plan, no_breach_plan) ->
+             (pool, no_breach_plan)
+         )
+      )
+end)
+
+module Slave1EvacuationPlan = Generic.MakeStateful (struct
+  module Io = struct
+    type input_t = pool
+
+    type output_t = (string * string) list
+
+    let string_of_input_t = string_of_pool
+
+    let string_of_output_t = Test_printers.(list (pair string string))
+  end
+
+  module State = Test_state.XapiDb
+
+  let load_input __context = setup ~__context
+
+  let extract_output __context _ =
+    let all_hosts =
+      Db.Host.get_all ~__context
+      |> List.filter (fun h ->
+             Db.Host.get_name_label ~__context ~self:h <> "localhost"
+         )
+    in
+    let slv1 =
+      Db.Host.get_all ~__context
+      |> List.find (fun self -> Db.Host.get_name_label ~__context ~self = slave1)
+    in
+    let slave1_vms =
+      Db.Host.get_resident_VMs ~__context ~self:slv1
+      |> List.map (fun self -> (self, Db.VM.get_record ~__context ~self))
+      |> List.filter (fun (_, record) -> not record.API.vM_is_control_domain)
+      |> List.map (fun (self, record) ->
+             (self, Xapi_ha_vm_failover.vm_memory ~__context record)
+         )
+    in
+    let hosts =
+      all_hosts
+      |> List.filter (( <> ) slv1)
+      |> List.map (fun host ->
+             (host, Xapi_ha_vm_failover.host_free_memory ~__context ~host)
+         )
+    in
+    Xapi_ha_vm_failover.compute_anti_affinity_evacuation_plan ~__context
+      (List.length all_hosts) hosts slave1_vms
+    |> List.map (fun (vm, host) ->
+           ( Db.VM.get_name_label ~__context ~self:vm
+           , Db.Host.get_name_label ~__context ~self:host
+           )
+       )
+
+  let tests =
+    `QuickAndAutoDocumented
+      [
+        (* Test 0: Spread evenly plan is taken. *)
+        ( {
+            basic_pool with
+            master= {memory_total= gib 200L; name_label= master; vms= []}
+          ; slaves=
+              [
+                {
+                  memory_total= gib 256L
+                ; name_label= slave1
+                ; vms=
+                    [
+                      {
+                        basic_vm with
+                        memory= gib 24L
+                      ; vm_name_label= vm4_grp1
+                      ; groups= [anti_affinity_grp1]
+                      }
+                    ; {
+                        basic_vm with
+                        memory= gib 23L
+                      ; vm_name_label= vm3_grp1
+                      ; groups= [anti_affinity_grp1]
+                      }
+                    ; {
+                        basic_vm with
+                        memory= gib 22L
+                      ; vm_name_label= vm2_grp1
+                      ; groups= [anti_affinity_grp1]
+                      }
+                    ; {
+                        basic_vm with
+                        memory= gib 21L
+                      ; vm_name_label= vm1_grp1
+                      ; groups= [anti_affinity_grp1]
+                      }
+                    ]
+                }
+              ; {memory_total= gib 60L; name_label= slave2; vms= []}
+              ]
+          }
+        , (* Assert that spread_evenly_plan is taken. *)
+          [
+            (vm4_grp1, master)
+          ; (vm3_grp1, slave2)
+          ; (vm2_grp1, master)
+          ; (vm1_grp1, slave2)
+          ]
+        )
+        (* Test 1: No breach plan is taken. *)
+      ; ( {
+            basic_pool with
+            master= {memory_total= gib 100L; name_label= master; vms= []}
+          ; slaves=
+              [
+                {
+                  memory_total= gib 256L
+                ; name_label= slave1
+                ; vms=
+                    [
+                      {basic_vm with memory= gib 85L; vm_name_label= vm1}
+                    ; {basic_vm with memory= gib 65L; vm_name_label= vm2}
+                    ; {
+                        basic_vm with
+                        memory= gib 30L
+                      ; vm_name_label= vm3_grp1
+                      ; groups= [anti_affinity_grp1]
+                      }
+                    ; {
+                        basic_vm with
+                        memory= gib 20L
+                      ; vm_name_label= vm2_grp1
+                      ; groups= [anti_affinity_grp1]
+                      }
+                    ; {
+                        basic_vm with
+                        memory= gib 10L
+                      ; vm_name_label= vm1_grp1
+                      ; groups= [anti_affinity_grp1]
+                      }
+                    ]
+                }
+              ; {memory_total= gib 90L; name_label= slave2; vms= []}
+              ; {memory_total= gib 70L; name_label= slave3; vms= []}
+              ]
+          ; keep_localhost= true
+          }
+        , (* Assert that no-breach-plan is taken *)
+          [
+            (vm2_grp1, slave2)
+          ; (vm1_grp1, slave3)
+          ; (vm3_grp1, slave3)
+          ; (vm2, slave2)
+          ; (vm1, master)
+          ]
+        )
+        (* Test 2: Fallback to binpack plan. *)
+      ; ( {
+            basic_pool with
+            master= {memory_total= gib 100L; name_label= master; vms= []}
+          ; slaves=
+              [
+                {
+                  memory_total= gib 256L
+                ; name_label= slave1
+                ; vms=
+                    [
+                      {basic_vm with memory= gib 95L; vm_name_label= vm1}
+                    ; {basic_vm with memory= gib 75L; vm_name_label= vm2}
+                    ; {
+                        basic_vm with
+                        memory= gib 30L
+                      ; vm_name_label= vm3_grp1
+                      ; groups= [anti_affinity_grp1]
+                      }
+                    ; {
+                        basic_vm with
+                        memory= gib 20L
+                      ; vm_name_label= vm2_grp1
+                      ; groups= [anti_affinity_grp1]
+                      }
+                    ; {
+                        basic_vm with
+                        memory= gib 10L
+                      ; vm_name_label= vm1_grp1
+                      ; groups= [anti_affinity_grp1]
+                      }
+                    ]
+                }
+              ; {memory_total= gib 80L; name_label= slave2; vms= []}
+              ; {memory_total= gib 70L; name_label= slave3; vms= []}
+              ]
+          ; keep_localhost= true
+          }
+        , (* Assert that binpack-plan is taken *)
+          [
+            (vm1_grp1, slave3)
+          ; (vm2_grp1, slave3)
+          ; (vm3_grp1, slave3)
+          ; (vm2, slave2)
+          ; (vm1, master)
+          ]
+        )
+      ]
+end)
+
+let tests =
+  [
+    ("plan_for_n_failures", PlanForNFailures.tests)
+  ; ( "anti-affinity spread evenly plan"
+    , Slave1EvacuationVMAntiAffinitySpreadEvenlyPlan.tests
+    )
+  ; ( "anti-affinity no breach plan"
+    , Slave1EvacuationVMAntiAffinityNoBreachPlan.tests
+    )
+  ; ( "3 phases plan: spread evenly plan, no breach plan, binpacking plan"
+    , Slave1EvacuationPlan.tests
+    )
+  ]
diff --git a/ocaml/xapi/xapi_ha_vm_failover.mli b/ocaml/xapi/xapi_ha_vm_failover.mli
index 89a4c3d20e5..52e2382164f 100644
--- a/ocaml/xapi/xapi_ha_vm_failover.mli
+++ b/ocaml/xapi/xapi_ha_vm_failover.mli
@@ -86,3 +86,61 @@ val assert_nfailures_change_preserves_ha_plan :
   __context:Context.t -> int -> unit
 
 val assert_new_vm_preserves_ha_plan : __context:Context.t -> API.ref_VM -> unit
+
+(* Below exposed only for ease of testing *)
+
+module VMGroupMap : Map.S with type key = [`VM_group] Ref.t
+
+module HostKey : sig
+  type t = [`host] Ref.t
+end
+
+module AntiAffinityEvacuationPlanHostQueue : Psq.S with type k = HostKey.t
+
+type spread_evenly_plan_pool_state =
+  AntiAffinityEvacuationPlanHostQueue.t VMGroupMap.t
+
+type no_breach_plan_pool_state = {
+    grp_no_resident_hosts_queue:
+      AntiAffinityEvacuationPlanHostQueue.t VMGroupMap.t
+  ; grp_resident_hosts_count_map: int VMGroupMap.t
+}
+
+val compute_spread_evenly_plan :
+     __context:Context.t
+  -> AntiAffinityEvacuationPlanHostQueue.t VMGroupMap.t
+  -> (API.ref_VM * int64 * API.ref_VM_group) list
+  -> (API.ref_VM * API.ref_host) list
+  -> (API.ref_VM * API.ref_host) list
+
+val compute_no_breach_plan :
+     __context:Context.t
+  -> int
+  -> no_breach_plan_pool_state
+  -> (API.ref_VM * int64 * API.ref_VM_group) list
+  -> (API.ref_VM * API.ref_host) list
+  -> (API.ref_VM * API.ref_host) list
+
+val compute_anti_affinity_evacuation_plan :
+     __context:Context.t
+  -> int
+  -> (API.ref_host * int64) list
+  -> (API.ref_VM * int64) list
+  -> (API.ref_VM * API.ref_host) list
+
+val host_free_memory : __context:Context.t -> host:[`host] Ref.t -> int64
+
+val vm_memory : __context:Context.t -> API.vM_t -> int64
+
+val anti_affinity_vms_increasing :
+     __context:Context.t
+  -> (API.ref_VM * 'a) list
+  -> (API.ref_VM * 'a * API.ref_VM_group) list
+
+val init_spread_evenly_plan_pool_state :
+     __context:Context.t
+  -> (API.ref_host * int64) list
+  -> AntiAffinityEvacuationPlanHostQueue.t VMGroupMap.t
+
+val init_no_breach_plan_pool_state :
+  spread_evenly_plan_pool_state -> no_breach_plan_pool_state