|
10 | 10 | def add_options(p):
|
11 | 11 | p.add_argument('--max-replicating-pdisks', type=int, help='Limit number of maximum replicating PDisks in the cluster')
|
12 | 12 | p.add_argument('--only-from-overpopulated-pdisks', action='store_true', help='Move vdisks out only from pdisks with over expected slot count')
|
| 13 | + p.add_argument('--sort-by', choices=['slots', 'space_ratio'], default='slots', help='First to reassign disks with the most slots or with the highest space ratio') |
| 14 | + p.add_argument('--storage-pool', type=str, help='Storage pool to balance') |
| 15 | + p.add_argument('--max-donors-per-pdisk', type=int, default=0, help='Limit number of donors per pdisk') |
13 | 16 | common.add_basic_format_options(p)
|
14 | 17 |
|
15 | 18 |
|
| 19 | +def build_pdisk_statistics(base_config, pdisk_map, vsolts): |
| 20 | + pdisks_statistics = { |
| 21 | + pdisk_id: { |
| 22 | + "PDiskId": pdisk_id, |
| 23 | + "AvailableSize": pdisk.PDiskMetrics.AvailableSize, |
| 24 | + "TotalSize": pdisk.PDiskMetrics.TotalSize, |
| 25 | + "CandidateVSlots": [], |
| 26 | + "DonorVSlots": [], |
| 27 | + } |
| 28 | + for pdisk_id, pdisk in pdisk_map.items() |
| 29 | + if pdisk.PDiskMetrics.TotalSize > 0 # pdisk works |
| 30 | + } |
| 31 | + for vslot in vsolts: |
| 32 | + pdisk_id = common.get_pdisk_id(vslot.VSlotId) |
| 33 | + pdisks_statistics[pdisk_id]["CandidateVSlots"].append(vslot) |
| 34 | + for vslot in base_config.VSlot: |
| 35 | + for donor in vslot.Donors: |
| 36 | + pdisk_id = common.get_pdisk_id(donor.VSlotId) |
| 37 | + pdisks_statistics[pdisk_id]["DonorVSlots"].append(donor) |
| 38 | + return pdisks_statistics |
| 39 | + |
| 40 | + |
16 | 41 | def do(args):
|
17 | 42 | while True:
|
18 | 43 | common.flush_cache()
|
19 | 44 |
|
20 | 45 | base_config = common.fetch_base_config()
|
| 46 | + storage_pools = common.fetch_storage_pools() |
21 | 47 | node_mon_map = common.fetch_node_mon_map({vslot.VSlotId.NodeId for vslot in base_config.VSlot})
|
22 | 48 | vslot_map = common.build_vslot_map(base_config)
|
23 | 49 | pdisk_map = common.build_pdisk_map(base_config)
|
24 | 50 | pdisk_usage = common.build_pdisk_usage_map(base_config, count_donors=False)
|
25 | 51 | pdisk_usage_w_donors = common.build_pdisk_usage_map(base_config, count_donors=True)
|
26 | 52 |
|
| 53 | + storage_pool_names_map = common.build_storage_pool_names_map(storage_pools) |
| 54 | + group_id_to_storage_pool_name_map = { |
| 55 | + group_id: storage_pool_names_map[(group.BoxId, group.StoragePoolId)] |
| 56 | + for group_id, group in common.build_group_map(base_config).items() |
| 57 | + if (group.BoxId, group.StoragePoolId) != (0, 0) # static group |
| 58 | + } |
| 59 | + |
27 | 60 | vdisks_groups_count_map = defaultdict(int)
|
28 | 61 | for group in base_config.Group:
|
29 | 62 | num = sum(vslot.Status == 'READY' for vslot in common.vslots_of_group(group, vslot_map)) - len(group.VSlotId)
|
@@ -82,12 +115,26 @@ def do(args):
|
82 | 115 |
|
83 | 116 | candidate_vslots = []
|
84 | 117 | if healthy_vslots_from_overpopulated_pdisks:
|
85 |
| - common.print_if_not_quiet(args, f'Found {len(healthy_vslots_from_overpopulated_pdisks)} vdisks from overpopulated pdisks', sys.stdout) |
| 118 | + common.print_if_not_quiet(args, f'Found {len(healthy_vslots_from_overpopulated_pdisks)} vdisks in healthy groups from overpopulated pdisks', sys.stdout) |
86 | 119 | candidate_vslots = healthy_vslots_from_overpopulated_pdisks
|
87 | 120 | elif healthy_vslots and not args.only_from_overpopulated_pdisks:
|
88 |
| - common.print_if_not_quiet(args, f'Found {len(healthy_vslots)} vdisks suitable for relocation', sys.stdout) |
| 121 | + common.print_if_not_quiet(args, f'Found {len(healthy_vslots)} vdisks in healthy groups', sys.stdout) |
89 | 122 | candidate_vslots = healthy_vslots
|
90 |
| - else: # candidate_vslots is empty |
| 123 | + |
| 124 | + if args.storage_pool is not None: |
| 125 | + existing_storage_pools = set(group_id_to_storage_pool_name_map.values()) |
| 126 | + if args.storage_pool not in existing_storage_pools: |
| 127 | + print(f"Storage pool {args.storage_pool} not found in existing storage pools: {existing_storage_pools}") |
| 128 | + sys.exit(1) |
| 129 | + candidate_vslots = [vslot for vslot in candidate_vslots if group_id_to_storage_pool_name_map[vslot.GroupId] == args.storage_pool] |
| 130 | + common.print_if_not_quiet(args, f'Found {len(candidate_vslots)} vdisks in {args.storage_pool} sotrage pool', sys.stdout) |
| 131 | + |
| 132 | + if args.max_donors_per_pdisk > 0: |
| 133 | + donors_per_pdisk = common.build_donors_per_pdisk_map(base_config) |
| 134 | + candidate_vslots = [vslot for vslot in candidate_vslots if donors_per_pdisk[common.get_pdisk_id(vslot.VSlotId)] < args.max_donors_per_pdisk] |
| 135 | + common.print_if_not_quiet(args, f'Found {len(candidate_vslots)} vdisks with donors per pdisk < {args.max_donors_per_pdisk}', sys.stdout) |
| 136 | + |
| 137 | + if len(candidate_vslots) == 0: |
91 | 138 | common.print_if_not_quiet(args, 'No vdisks suitable for relocation found, waiting..', sys.stdout)
|
92 | 139 | time.sleep(10)
|
93 | 140 | continue
|
@@ -182,14 +229,30 @@ def add_reassign_cmd(request, vslot):
|
182 | 229 | return True
|
183 | 230 | # end of do_reassign()
|
184 | 231 |
|
185 |
| - vslots_by_pdisk_slot_usage = defaultdict(list) |
186 |
| - for vslot in candidate_vslots: |
187 |
| - pdisk_id = common.get_pdisk_id(vslot.VSlotId) |
188 |
| - pdisk_slot_usage = pdisk_usage[pdisk_id] |
189 |
| - vslots_by_pdisk_slot_usage[pdisk_slot_usage].append(vslot) |
190 |
| - |
191 |
| - # check vslots from pdisks with the highest slot usage first |
192 |
| - for pdisk_slot_usage, vslots in sorted(vslots_by_pdisk_slot_usage.items(), reverse=True): |
| 232 | + vslots_ordered_groups_to_reassign = None |
| 233 | + if args.sort_by == 'slots': |
| 234 | + vslots_by_pdisk_slot_usage = defaultdict(list) |
| 235 | + for vslot in candidate_vslots: |
| 236 | + pdisk_id = common.get_pdisk_id(vslot.VSlotId) |
| 237 | + pdisk_slot_usage = pdisk_usage[pdisk_id] |
| 238 | + vslots_by_pdisk_slot_usage[pdisk_slot_usage].append(vslot) |
| 239 | + vslots_ordered_groups_to_reassign = [vslots for _, vslots in sorted(vslots_by_pdisk_slot_usage.items(), reverse=True)] |
| 240 | + elif args.sort_by == 'space_ratio': |
| 241 | + pdisks = { |
| 242 | + pdisk_id: { |
| 243 | + "FreeSpaceRatio": float(pdisk.PDiskMetrics.AvailableSize) / float(pdisk.PDiskMetrics.TotalSize), |
| 244 | + "CandidateVSlots": [], |
| 245 | + } |
| 246 | + for pdisk_id, pdisk in pdisk_map.items() |
| 247 | + if pdisk.PDiskMetrics.TotalSize > 0 # pdisk works |
| 248 | + } |
| 249 | + for vslot in candidate_vslots: |
| 250 | + pdisk_id = common.get_pdisk_id(vslot.VSlotId) |
| 251 | + pdisks[pdisk_id]["CandidateVSlots"].append(vslot) |
| 252 | + print({pdisk: (len(info["CandidateVSlots"]), info["FreeSpaceRatio"]) for pdisk, info in pdisks.items()}) |
| 253 | + vslots_ordered_groups_to_reassign = [info["CandidateVSlots"] for _, info in sorted(list(pdisks.items()), key=lambda x: x[1]["FreeSpaceRatio"])] |
| 254 | + |
| 255 | + for vslots in vslots_ordered_groups_to_reassign: |
193 | 256 | random.shuffle(vslots)
|
194 | 257 | for vslot in vslots:
|
195 | 258 | if do_reassign(vslot, False):
|
|
0 commit comments