@@ -36,9 +36,12 @@ import (
36
36
"github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/qosaware/reporter/manager/resource"
37
37
hmadvisor "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/qosaware/resource"
38
38
"github.com/kubewharf/katalyst-core/pkg/config"
39
+ "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic"
39
40
"github.com/kubewharf/katalyst-core/pkg/metaserver"
40
41
"github.com/kubewharf/katalyst-core/pkg/metrics"
41
42
"github.com/kubewharf/katalyst-core/pkg/util"
43
+ "github.com/kubewharf/katalyst-core/pkg/util/general"
44
+ "github.com/kubewharf/katalyst-core/pkg/util/native"
42
45
)
43
46
44
47
func init () {
@@ -50,6 +53,10 @@ const (
50
53
headroomReporterPluginName = "headroom-reporter-plugin"
51
54
)
52
55
56
+ const (
57
+ metricsNameReclaimedResourceRevised = "reclaimed_resource_revised"
58
+ )
59
+
53
60
type HeadroomResourceManager interface {
54
61
manager.ResourceManager
55
62
manager.NumaResourceManager
@@ -111,16 +118,21 @@ type reclaimedResource struct {
111
118
capacity v1.ResourceList
112
119
numaAllocatable map [int ]v1.ResourceList
113
120
numaCapacity map [int ]v1.ResourceList
121
+
122
+ resourceNameMap map [v1.ResourceName ]v1.ResourceName
123
+ milliValue map [v1.ResourceName ]bool
114
124
}
115
125
116
126
type headroomReporterPlugin struct {
117
127
sync.Mutex
118
128
headroomManagers map [v1.ResourceName ]manager.HeadroomManager
119
129
numaSocketZoneNodeMap map [util.ZoneNode ]util.ZoneNode
120
130
121
- ctx context.Context
122
- cancel context.CancelFunc
123
- started bool
131
+ dynamicConf * dynamic.DynamicAgentConfiguration
132
+ ctx context.Context
133
+ cancel context.CancelFunc
134
+ emitter metrics.MetricEmitter
135
+ started bool
124
136
}
125
137
126
138
func newHeadroomReporterPlugin (emitter metrics.MetricEmitter , metaServer * metaserver.MetaServer ,
@@ -131,6 +143,11 @@ func newHeadroomReporterPlugin(emitter metrics.MetricEmitter, metaServer *metase
131
143
errList []error
132
144
)
133
145
146
+ // init numa topo info by metaServer
147
+ if metaServer == nil || metaServer .MachineInfo == nil {
148
+ return nil , nil , fmt .Errorf ("get metaserver machine info is nil" )
149
+ }
150
+
134
151
initializers := manager .GetRegisteredManagerInitializers ()
135
152
headroomManagers := make (map [v1.ResourceName ]manager.HeadroomManager , len (initializers ))
136
153
for name , initializer := range initializers {
@@ -140,18 +157,15 @@ func newHeadroomReporterPlugin(emitter metrics.MetricEmitter, metaServer *metase
140
157
}
141
158
}
142
159
143
- // init numa topo info by metaServer
144
- if metaServer == nil || metaServer .MachineInfo == nil {
145
- errList = append (errList , fmt .Errorf ("get metaserver machine info is nil" ))
146
- }
147
-
148
160
if len (errList ) > 0 {
149
161
return nil , nil , errors .NewAggregate (errList )
150
162
}
151
163
152
164
reporter := & headroomReporterPlugin {
153
165
headroomManagers : headroomManagers ,
154
166
numaSocketZoneNodeMap : util .GenerateNumaSocketZone (metaServer .MachineInfo .Topology ),
167
+ dynamicConf : conf .DynamicAgentConfiguration ,
168
+ emitter : emitter ,
155
169
}
156
170
pluginWrapper , err := skeleton .NewRegistrationPluginWrapper (reporter , []string {conf .PluginRegistrationDir },
157
171
func (key string , value int64 ) {
@@ -223,6 +237,12 @@ func (r *headroomReporterPlugin) GetReportContent(_ context.Context, _ *v1alpha1
223
237
return nil , err
224
238
}
225
239
240
+ // revise reclaimed resource to avoid resource fragmentation
241
+ err = r .reviseReclaimedResource (res )
242
+ if err != nil {
243
+ return nil , err
244
+ }
245
+
226
246
reportToCNR , err := r .getReportReclaimedResourceForCNR (res )
227
247
if err != nil {
228
248
return nil , err
@@ -256,47 +276,51 @@ func (r *headroomReporterPlugin) getReclaimedResource() (*reclaimedResource, err
256
276
capacity := make (v1.ResourceList )
257
277
numaAllocatable := make (map [int ]v1.ResourceList )
258
278
numaCapacity := make (map [int ]v1.ResourceList )
259
- for resourceName , rm := range r .headroomManagers {
260
- allocatable [resourceName ], err = rm .GetAllocatable ()
279
+ resourceNameMap := make (map [v1.ResourceName ]v1.ResourceName )
280
+ milliValue := make (map [v1.ResourceName ]bool )
281
+ for reportName , rm := range r .headroomManagers {
282
+ // get origin resource name
283
+ resourceNameMap [reportName ] = rm .Name ()
284
+ milliValue [reportName ] = rm .MilliValue ()
285
+ allocatable [reportName ], err = rm .GetAllocatable ()
261
286
if err != nil {
262
- errList = append (errList , fmt .Errorf ("get reclaimed %s allocatable failed: %s" , resourceName , err ))
287
+ errList = append (errList , fmt .Errorf ("get reclaimed %s allocatable failed: %s" , reportName , err ))
263
288
}
264
289
265
- capacity [resourceName ], err = rm .GetCapacity ()
290
+ capacity [reportName ], err = rm .GetCapacity ()
266
291
if err != nil {
267
- errList = append (errList , err , fmt .Errorf ("get reclaimed %s capacity failed: %s" , resourceName , err ))
292
+ errList = append (errList , err , fmt .Errorf ("get reclaimed %s capacity failed: %s" , reportName , err ))
268
293
}
269
294
270
295
// get allocatable per numa
271
296
allocatableMap , err := rm .GetNumaAllocatable ()
272
297
if err != nil {
273
- errList = append (errList , fmt .Errorf ("get reclaimed %s numa allocatable failed: %s" , resourceName , err ))
298
+ errList = append (errList , fmt .Errorf ("get reclaimed %s numa allocatable failed: %s" , reportName , err ))
274
299
} else {
275
300
for numaID , quantity := range allocatableMap {
276
301
perNumaAllocatable , ok := numaAllocatable [numaID ]
277
302
if ! ok {
278
303
perNumaAllocatable = make (v1.ResourceList )
279
304
numaAllocatable [numaID ] = perNumaAllocatable
280
305
}
281
- perNumaAllocatable [resourceName ] = quantity
306
+ perNumaAllocatable [reportName ] = quantity
282
307
}
283
308
}
284
309
285
310
// get capacity per numa
286
311
capacityMap , err := rm .GetNumaCapacity ()
287
312
if err != nil {
288
- errList = append (errList , fmt .Errorf ("get reclaimed %s numa capacity failed: %s" , resourceName , err ))
313
+ errList = append (errList , fmt .Errorf ("get reclaimed %s numa capacity failed: %s" , reportName , err ))
289
314
} else {
290
315
for numaID , quantity := range capacityMap {
291
316
perNumaCapacity , ok := numaCapacity [numaID ]
292
317
if ! ok {
293
318
perNumaCapacity = make (v1.ResourceList )
294
319
numaCapacity [numaID ] = perNumaCapacity
295
320
}
296
- perNumaCapacity [resourceName ] = quantity
321
+ perNumaCapacity [reportName ] = quantity
297
322
}
298
323
}
299
-
300
324
}
301
325
302
326
if len (errList ) > 0 {
@@ -308,7 +332,9 @@ func (r *headroomReporterPlugin) getReclaimedResource() (*reclaimedResource, err
308
332
capacity : capacity ,
309
333
numaAllocatable : numaAllocatable ,
310
334
numaCapacity : numaCapacity ,
311
- }, err
335
+ milliValue : milliValue ,
336
+ resourceNameMap : resourceNameMap ,
337
+ }, nil
312
338
}
313
339
314
340
func (r * headroomReporterPlugin ) getReportReclaimedResourceForCNR (reclaimedResource * reclaimedResource ) (* v1alpha1.ReportContent , error ) {
@@ -385,3 +411,67 @@ func (r *headroomReporterPlugin) getReportNUMAReclaimedResource(reclaimedResourc
385
411
Value : value ,
386
412
}, nil
387
413
}
414
+
415
+ func (r * headroomReporterPlugin ) reviseReclaimedResource (res * reclaimedResource ) error {
416
+ if res == nil {
417
+ return fmt .Errorf ("reclaimed resource is nil" )
418
+ }
419
+
420
+ conf := r .dynamicConf .GetDynamicConfiguration ()
421
+ reviseFunc := func (resList v1.ResourceList ) bool {
422
+ revise := false
423
+ for reportName , quantity := range resList {
424
+ resourceName , ok := res .resourceNameMap [reportName ]
425
+ if ! ok {
426
+ resourceName = reportName
427
+ }
428
+
429
+ minIgnored , ok := conf .MinIgnoredReclaimedResourceForReport [resourceName ]
430
+ if ok {
431
+ milliValue , ok := res .milliValue [reportName ]
432
+ if ok && milliValue {
433
+ minIgnored = * apiresource .NewQuantity (minIgnored .MilliValue (), minIgnored .Format )
434
+ }
435
+
436
+ if quantity .Cmp (minIgnored ) <= 0 {
437
+ revise = true
438
+ break
439
+ }
440
+ }
441
+ }
442
+
443
+ if revise {
444
+ for resourceName := range resList {
445
+ resList [resourceName ] = apiresource.Quantity {}
446
+ }
447
+ }
448
+
449
+ return revise
450
+ }
451
+
452
+ numaRevised := false
453
+ for numaID := range res .numaAllocatable {
454
+ if reviseFunc (res .numaAllocatable [numaID ]) {
455
+ numaRevised = true
456
+ }
457
+ }
458
+
459
+ if numaRevised {
460
+ sumNUMAAllocatable := v1.ResourceList {}
461
+ for _ , allocatable := range res .numaAllocatable {
462
+ sumNUMAAllocatable = native .AddResources (sumNUMAAllocatable , allocatable )
463
+ }
464
+ res .allocatable = sumNUMAAllocatable
465
+ }
466
+
467
+ revised := reviseFunc (res .allocatable )
468
+ if numaRevised || revised {
469
+ general .InfoS ("revised result" ,
470
+ "allocatable" , res .allocatable ,
471
+ "capacity" , res .capacity ,
472
+ "numaAllocatable" , res .numaAllocatable ,
473
+ "numaCapacity" , res .numaCapacity )
474
+ _ = r .emitter .StoreInt64 (metricsNameReclaimedResourceRevised , 1 , metrics .MetricTypeNameRaw )
475
+ }
476
+ return nil
477
+ }
0 commit comments