@@ -41,6 +41,8 @@ import (
41
41
qosutil "github.com/kubewharf/katalyst-core/pkg/util/qos"
42
42
)
43
43
44
+ var errNoAvailableCPUHints = fmt .Errorf ("no available cpu hints" )
45
+
44
46
type memBWHintUpdate struct {
45
47
updatedPreferrence bool
46
48
leftAllocatable int
@@ -73,7 +75,7 @@ func (p *DynamicPolicy) sharedCoresHintHandler(ctx context.Context,
73
75
"podName" : req .PodName ,
74
76
"containerName" : req .ContainerName ,
75
77
})... )
76
- return nil , fmt . Errorf ( "no enough cpu resource" )
78
+ return nil , errNoAvailableCPUHints
77
79
}
78
80
}
79
81
@@ -209,12 +211,6 @@ func (p *DynamicPolicy) calculateHints(reqInt int,
209
211
}
210
212
sort .Ints (numaNodes )
211
213
212
- hints := map [string ]* pluginapi.ListOfTopologyHints {
213
- string (v1 .ResourceCPU ): {
214
- Hints : []* pluginapi.TopologyHint {},
215
- },
216
- }
217
-
218
214
minNUMAsCountNeeded , _ , err := util .GetNUMANodesCountToFitCPUReq (reqInt , p .machineInfo .CPUTopology )
219
215
if err != nil {
220
216
return nil , fmt .Errorf ("GetNUMANodesCountToFitCPUReq failed with error: %v" , err )
@@ -262,6 +258,7 @@ func (p *DynamicPolicy) calculateHints(reqInt int,
262
258
}
263
259
264
260
preferredHintIndexes := []int {}
261
+ var availableNumaHints []* pluginapi.TopologyHint
265
262
machine .IterateBitMasks (numaNodes , numaBound , func (mask machine.BitMask ) {
266
263
maskCount := mask .Count ()
267
264
if maskCount < minNUMAsCountNeeded {
@@ -292,18 +289,27 @@ func (p *DynamicPolicy) calculateHints(reqInt int,
292
289
}
293
290
294
291
preferred := maskCount == minNUMAsCountNeeded
295
- hints [ string ( v1 . ResourceCPU )]. Hints = append (hints [ string ( v1 . ResourceCPU )]. Hints , & pluginapi.TopologyHint {
292
+ availableNumaHints = append (availableNumaHints , & pluginapi.TopologyHint {
296
293
Nodes : machine .MaskToUInt64Array (mask ),
297
294
Preferred : preferred ,
298
295
})
299
296
300
297
if preferred {
301
- preferredHintIndexes = append (preferredHintIndexes , len (hints [ string ( v1 . ResourceCPU )]. Hints )- 1 )
298
+ preferredHintIndexes = append (preferredHintIndexes , len (availableNumaHints )- 1 )
302
299
}
303
300
})
304
301
302
+ // NOTE: because grpc is inability to distinguish between an empty array and nil,
303
+ // we return an error instead of an empty array.
304
+ // we should resolve this issue if we need manage multi resource in one plugin.
305
+ if len (availableNumaHints ) == 0 {
306
+ general .Warningf ("calculateHints got no available cpu hints for pod: %s/%s, container: %s" ,
307
+ req .PodNamespace , req .PodName , req .ContainerName )
308
+ return nil , errNoAvailableCPUHints
309
+ }
310
+
305
311
if numaBound > machine .MBWNUMAsPoint {
306
- numaAllocatedMemBW , err := getNUMAAllocatedMemBW (machineState , p .metaServer )
312
+ numaAllocatedMemBW , err := getNUMAAllocatedMemBW (machineState , p .metaServer , p . getContainerRequestedCores )
307
313
308
314
general .InfoS ("getNUMAAllocatedMemBW" ,
309
315
"podNamespace" , req .PodNamespace ,
@@ -314,15 +320,21 @@ func (p *DynamicPolicy) calculateHints(reqInt int,
314
320
general .Errorf ("getNUMAAllocatedMemBW failed with error: %v" , err )
315
321
_ = p .emitter .StoreInt64 (util .MetricNameGetNUMAAllocatedMemBWFailed , 1 , metrics .MetricTypeNameRaw )
316
322
} else {
317
- p .updatePreferredCPUHintsByMemBW (preferredHintIndexes , hints [ string ( v1 . ResourceCPU )]. Hints ,
323
+ p .updatePreferredCPUHintsByMemBW (preferredHintIndexes , availableNumaHints ,
318
324
reqInt , numaAllocatedMemBW , req , numaExclusive )
319
325
}
320
326
}
321
327
328
+ hints := map [string ]* pluginapi.ListOfTopologyHints {
329
+ string (v1 .ResourceCPU ): {
330
+ Hints : availableNumaHints ,
331
+ },
332
+ }
333
+
322
334
return hints , nil
323
335
}
324
336
325
- func getNUMAAllocatedMemBW (machineState state.NUMANodeMap , metaServer * metaserver.MetaServer ) (map [int ]int , error ) {
337
+ func getNUMAAllocatedMemBW (machineState state.NUMANodeMap , metaServer * metaserver.MetaServer , getContainerRequestedCores state. GetContainerRequestedCoresFunc ) (map [int ]int , error ) {
326
338
numaAllocatedMemBW := make (map [int ]int )
327
339
podUIDToMemBWReq := make (map [string ]int )
328
340
podUIDToBindingNUMAs := make (map [string ]sets.Int )
@@ -350,7 +362,7 @@ func getNUMAAllocatedMemBW(machineState state.NUMANodeMap, metaServer *metaserve
350
362
Name : allocationInfo .PodName ,
351
363
Labels : allocationInfo .Labels ,
352
364
Annotations : allocationInfo .Annotations ,
353
- }, int (math .Ceil (state . GetContainerRequestedCores () (allocationInfo ))))
365
+ }, int (math .Ceil (getContainerRequestedCores (allocationInfo ))))
354
366
if err != nil {
355
367
return nil , fmt .Errorf ("GetContainerMemoryBandwidthRequest for pod: %s/%s, container: %s failed with error: %v" ,
356
368
allocationInfo .PodNamespace , allocationInfo .PodName , allocationInfo .ContainerName , err )
@@ -633,7 +645,7 @@ func (p *DynamicPolicy) sharedCoresWithNUMABindingHintHandler(_ context.Context,
633
645
general .Infof ("pod: %s/%s, container: %s request inplace update resize and no enough resource in current NUMA, try to migrate it to new NUMA" ,
634
646
req .PodNamespace , req .PodName , req .ContainerName )
635
647
var calculateErr error
636
- hints , calculateErr = p .calculateHintsForNUMABindingSharedCores (reqInt , podEntries , machineState , req . Annotations )
648
+ hints , calculateErr = p .calculateHintsForNUMABindingSharedCores (reqInt , podEntries , machineState , req )
637
649
if calculateErr != nil {
638
650
general .Errorf ("pod: %s/%s, container: %s request inplace update resize and no enough resource in current NUMA, failed to migrate it to new NUMA" ,
639
651
req .PodNamespace , req .PodName , req .ContainerName )
@@ -642,15 +654,15 @@ func (p *DynamicPolicy) sharedCoresWithNUMABindingHintHandler(_ context.Context,
642
654
} else {
643
655
general .Errorf ("pod: %s/%s, container: %s request inplace update resize, but no enough resource for it in current NUMA" ,
644
656
req .PodNamespace , req .PodName , req .ContainerName )
645
- return nil , fmt . Errorf ( "inplace update resize scale out failed with no enough resource" )
657
+ return nil , errNoAvailableCPUHints
646
658
}
647
659
} else {
648
660
general .Infof ("pod: %s/%s, container: %s request inplace update resize, there is enough resource for it in current NUMA" ,
649
661
req .PodNamespace , req .PodName , req .ContainerName )
650
662
}
651
663
} else if hints == nil {
652
664
var calculateErr error
653
- hints , calculateErr = p .calculateHintsForNUMABindingSharedCores (reqInt , podEntries , machineState , req . Annotations )
665
+ hints , calculateErr = p .calculateHintsForNUMABindingSharedCores (reqInt , podEntries , machineState , req )
654
666
if calculateErr != nil {
655
667
return nil , fmt .Errorf ("calculateHintsForNUMABindingSharedCores failed with error: %v" , calculateErr )
656
668
}
@@ -780,12 +792,13 @@ func (p *DynamicPolicy) filterNUMANodesByNonBindingSharedRequestedQuantity(nonBi
780
792
781
793
func (p * DynamicPolicy ) calculateHintsForNUMABindingSharedCores (reqInt int , podEntries state.PodEntries ,
782
794
machineState state.NUMANodeMap ,
783
- reqAnnotations map [ string ] string ,
795
+ req * pluginapi. ResourceRequest ,
784
796
) (map [string ]* pluginapi.ListOfTopologyHints , error ) {
785
797
nonBindingNUMAsCPUQuantity := machineState .GetFilteredAvailableCPUSet (p .reservedCPUs , nil , state .CheckNUMABinding ).Size ()
786
798
nonBindingNUMAs := machineState .GetFilteredNUMASet (state .CheckNUMABinding )
787
- nonBindingSharedRequestedQuantity := state .GetNonBindingSharedRequestedQuantityFromPodEntries (podEntries )
799
+ nonBindingSharedRequestedQuantity := state .GetNonBindingSharedRequestedQuantityFromPodEntries (podEntries , nil , p . getContainerRequestedCores )
788
800
801
+ reqAnnotations := req .Annotations
789
802
numaNodes := p .filterNUMANodesByNonBindingSharedRequestedQuantity (nonBindingSharedRequestedQuantity ,
790
803
nonBindingNUMAsCPUQuantity , nonBindingNUMAs , machineState ,
791
804
machineState .GetFilteredNUMASetWithAnnotations (state .CheckNUMABindingSharedCoresAntiAffinity , reqAnnotations ).ToSliceInt ())
@@ -826,6 +839,15 @@ func (p *DynamicPolicy) calculateHintsForNUMABindingSharedCores(reqInt int, podE
826
839
p .populateHintsByPreferPolicy (numaNodes , cpuconsts .CPUNUMAHintPreferPolicySpreading , hints , machineState , reqInt )
827
840
}
828
841
842
+ // NOTE: because grpc is inability to distinguish between an empty array and nil,
843
+ // we return an error instead of an empty array.
844
+ // we should resolve this issue if we need manage multi resource in one plugin.
845
+ if len (hints [string (v1 .ResourceCPU )].Hints ) == 0 {
846
+ general .Warningf ("calculateHints got no available memory hints for snb pod: %s/%s, container: %s" ,
847
+ req .PodNamespace , req .PodName , req .ContainerName )
848
+ return nil , errNoAvailableCPUHints
849
+ }
850
+
829
851
return hints , nil
830
852
}
831
853
0 commit comments