Skip to content

Commit bbf3ec5

Browse files
authored
Merge pull request #750 from cheney-lin/dev/round
fix(sysadvisor): fix provision logic of dedicated_cores region
2 parents ce5047c + e7355da commit bbf3ec5

File tree

16 files changed

+145
-90
lines changed

16 files changed

+145
-90
lines changed

pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor_test.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -828,8 +828,8 @@ func TestAdvisorUpdate(t *testing.T) {
828828
wantInternalCalculationResult: types.InternalCPUCalculationResult{
829829
PoolEntries: map[string]map[int]int{
830830
commonstate.PoolNameReserve: {-1: 2},
831-
commonstate.PoolNameShare: {-1: 84},
832-
commonstate.PoolNameReclaim: {-1: 8},
831+
commonstate.PoolNameShare: {-1: 82},
832+
commonstate.PoolNameReclaim: {-1: 10},
833833
"isolation-pod1": {-1: 2},
834834
},
835835
},
@@ -959,8 +959,8 @@ func TestAdvisorUpdate(t *testing.T) {
959959
wantInternalCalculationResult: types.InternalCPUCalculationResult{
960960
PoolEntries: map[string]map[int]int{
961961
commonstate.PoolNameReserve: {-1: 2},
962-
commonstate.PoolNameShare: {-1: 90},
963-
commonstate.PoolNameReclaim: {-1: 4},
962+
commonstate.PoolNameShare: {-1: 88},
963+
commonstate.PoolNameReclaim: {-1: 6},
964964
},
965965
},
966966
wantHeadroom: resource.Quantity{},
@@ -1074,8 +1074,8 @@ func TestAdvisorUpdate(t *testing.T) {
10741074
wantInternalCalculationResult: types.InternalCPUCalculationResult{
10751075
PoolEntries: map[string]map[int]int{
10761076
commonstate.PoolNameReserve: {-1: 2},
1077-
commonstate.PoolNameShare: {-1: 90},
1078-
commonstate.PoolNameReclaim: {-1: 4},
1077+
commonstate.PoolNameShare: {-1: 88},
1078+
commonstate.PoolNameReclaim: {-1: 6},
10791079
},
10801080
},
10811081
wantHeadroom: resource.Quantity{},

pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ func (fake *FakeRegion) Type() configapi.QoSRegionType {
7474
return fake.regionType
7575
}
7676

77+
func (fake *FakeRegion) GetMetaInfo() string {
78+
return "fake"
79+
}
80+
7781
func (fake *FakeRegion) OwnerPoolName() string {
7882
return fake.ownerPoolName
7983
}

pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ type ProvisionPolicy interface {
4141
Update() error
4242
// GetControlKnobAdjusted returns the latest legal control knob value
4343
GetControlKnobAdjusted() (types.ControlKnob, error)
44+
45+
GetMetaInfo() string
4446
}
4547

4648
type InitFunc func(regionName string, regionType configapi.QoSRegionType, ownerPoolName string,

pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_base.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ func (p *PolicyBase) GetControlKnobAdjusted() (types.ControlKnob, error) {
7676
return p.controlKnobAdjusted.Clone(), nil
7777

7878
case configapi.QoSRegionTypeIsolation:
79-
return map[configapi.ControlKnobName]types.ControlKnobValue{
79+
return map[configapi.ControlKnobName]types.ControlKnobItem{
8080
configapi.ControlKnobNonReclaimedCPURequirementUpper: {
8181
Value: p.ResourceUpperBound,
8282
Action: types.ControlKnobActionNone,
@@ -91,3 +91,7 @@ func (p *PolicyBase) GetControlKnobAdjusted() (types.ControlKnob, error) {
9191
return nil, fmt.Errorf("unsupported region type %v", p.regionType)
9292
}
9393
}
94+
95+
func (p *PolicyBase) GetMetaInfo() string {
96+
return fmt.Sprintf("[regionName: %s, regionType: %s, ownerPoolName: %s, NUMAs: %v]", p.regionName, p.regionType, p.ownerPoolName, p.bindingNumas.String())
97+
}

pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_canonical.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ func (p *PolicyCanonical) Update() error {
6060
}
6161

6262
p.controlKnobAdjusted = types.ControlKnob{
63-
configapi.ControlKnobNonReclaimedCPURequirement: types.ControlKnobValue{
63+
configapi.ControlKnobNonReclaimedCPURequirement: types.ControlKnobItem{
6464
Value: cpuEstimation,
6565
Action: types.ControlKnobActionNone,
6666
},

pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_none.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,4 @@ func (p *PolicyNone) Update() error
4242
func (p *PolicyNone) GetControlKnobAdjusted() (types.ControlKnob, error) {
4343
return types.InvalidControlKnob, nil
4444
}
45+
func (p *PolicyNone) GetMetaInfo() string { return "" }

pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_rama.go

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,14 +77,14 @@ func (p *PolicyRama) Update() error {
7777

7878
controller, ok := p.controllers[metricName]
7979
if !ok {
80-
controller = helper.NewPIDController(metricName, params)
80+
controller = helper.NewPIDController(metricName, params, p.GetMetaInfo())
8181
p.controllers[metricName] = controller
8282
}
8383

8484
controller.SetEssentials(p.ResourceEssentials)
8585
cpuAdjusted := controller.Adjust(cpuSize, indicator.Target, indicator.Current)
8686

87-
general.InfoS("[qosaware-cpu-rama] pid adjust result", "regionName", p.regionName, "metricName", metricName, "cpuAdjusted", cpuAdjusted, "last cpu size", cpuSize)
87+
general.InfoS("[qosaware-cpu-rama] pid adjust result", "meta", p.GetMetaInfo(), "metricName", metricName, "cpuAdjusted", cpuAdjusted, "last cpu size", cpuSize)
8888

8989
if cpuAdjusted > cpuAdjustedRaw {
9090
cpuAdjustedRaw = cpuAdjusted
@@ -104,12 +104,10 @@ func (p *PolicyRama) Update() error {
104104
}
105105
}
106106

107-
general.Infof("[qosaware-cpu-rama] ReclaimOverlap=%v, region=%v", p.ControlEssentials.ReclaimOverlap, p.regionName)
108-
109107
cpuAdjustedRestricted := cpuAdjustedRaw
110108

111109
p.controlKnobAdjusted = types.ControlKnob{
112-
configapi.ControlKnobNonReclaimedCPURequirement: types.ControlKnobValue{
110+
configapi.ControlKnobNonReclaimedCPURequirement: types.ControlKnobItem{
113111
Value: cpuAdjustedRestricted,
114112
Action: types.ControlKnobActionNone,
115113
},

pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ type QoSRegion interface {
8585
GetStatus() types.RegionStatus
8686
// GetControlEssentials returns the latest control essentials
8787
GetControlEssentials() types.ControlEssentials
88+
89+
GetMetaInfo() string
8890
}
8991

9092
// GetRegionBasicMetricTags returns metric tag slice of region info and status

pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_base.go

Lines changed: 37 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -79,13 +79,15 @@ type internalHeadroomPolicy struct {
7979
}
8080

8181
type provisionPolicyResult struct {
82+
msg string
8283
essentials types.ResourceEssentials
8384
regulatorOptions regulator.RegulatorOptions
8485
controlKnobValueRegulators map[v1alpha1.ControlKnobName]regulator.Regulator
8586
}
8687

87-
func newProvisionPolicyResult(essentials types.ResourceEssentials, regulatorOptions regulator.RegulatorOptions) *provisionPolicyResult {
88+
func newProvisionPolicyResult(essentials types.ResourceEssentials, regulatorOptions regulator.RegulatorOptions, msg string) *provisionPolicyResult {
8889
return &provisionPolicyResult{
90+
msg: msg,
8991
essentials: essentials,
9092
regulatorOptions: regulatorOptions,
9193
controlKnobValueRegulators: make(map[v1alpha1.ControlKnobName]regulator.Regulator),
@@ -102,26 +104,20 @@ func (r *provisionPolicyResult) setEssentials(essentials types.ResourceEssential
102104

103105
// regulateControlKnob is to regulate control knob with current and last one
104106
// todo: current only regulate control knob value, it will also regulate action in the future
105-
func (r *provisionPolicyResult) regulateControlKnob(currentControlKnob types.ControlKnob, lastControlKnob *types.ControlKnob) {
106-
if lastControlKnob != nil {
107-
for name, knob := range *lastControlKnob {
108-
reg, ok := r.controlKnobValueRegulators[name]
109-
if !ok || reg == nil {
110-
reg = r.newRegulator(name)
111-
}
112-
113-
reg.SetLatestControlKnobValue(knob)
114-
r.controlKnobValueRegulators[name] = reg
115-
}
116-
}
117-
107+
func (r *provisionPolicyResult) regulateControlKnob(currentControlKnob, effectiveControlKnob types.ControlKnob) {
108+
klog.InfoS("[provisionPolicyResult]", "region", r.msg,
109+
"currentControlKnob", currentControlKnob, "effectiveControlKnob", effectiveControlKnob)
118110
for name, knob := range currentControlKnob {
119111
reg, ok := r.controlKnobValueRegulators[name]
120112
if !ok || reg == nil {
121113
reg = r.newRegulator(name)
122114
}
123-
124-
reg.Regulate(knob)
115+
effectiveKnobItem, ok := effectiveControlKnob[name]
116+
if ok {
117+
reg.Regulate(knob, &effectiveKnobItem)
118+
} else {
119+
reg.Regulate(knob, nil)
120+
}
125121
r.controlKnobValueRegulators[name] = reg
126122
}
127123
}
@@ -140,9 +136,9 @@ func (r *provisionPolicyResult) newRegulator(name v1alpha1.ControlKnobName) regu
140136
// getControlKnob is to get final control knob from regulators
141137
func (r *provisionPolicyResult) getControlKnob() types.ControlKnob {
142138
controlKnob := make(types.ControlKnob)
143-
for name, r := range r.controlKnobValueRegulators {
144-
controlKnob[name] = types.ControlKnobValue{
145-
Value: float64(r.GetRequirement()),
139+
for name, regulator := range r.controlKnobValueRegulators {
140+
controlKnob[name] = types.ControlKnobItem{
141+
Value: float64(regulator.GetRequirement()),
146142
Action: types.ControlKnobActionNone,
147143
}
148144
}
@@ -230,6 +226,11 @@ func NewQoSRegionBase(name string, ownerPoolName string, regionType v1alpha1.QoS
230226
MaxRampUpStep: conf.MaxRampUpStep,
231227
MaxRampDownStep: conf.MaxRampDownStep,
232228
MinRampDownPeriod: conf.MinRampDownPeriod,
229+
NeedHTAligned: func() bool {
230+
return machine.SmtActive() &&
231+
!conf.GetDynamicConfiguration().AllowSharedCoresOverlapReclaimedCores &&
232+
regionType == v1alpha1.QoSRegionTypeShare
233+
},
233234
},
234235

235236
metaReader: metaReader,
@@ -289,6 +290,16 @@ func (r *QoSRegionBase) Clear() {
289290
r.containerTopologyAwareAssignment = make(types.TopologyAwareAssignment)
290291
}
291292

293+
func (r *QoSRegionBase) GetMetaInfo() string {
294+
r.Lock()
295+
defer r.Unlock()
296+
return r.getMetaInfo()
297+
}
298+
299+
func (r *QoSRegionBase) getMetaInfo() string {
300+
return fmt.Sprintf("[regionName: %s, regionType: %s, ownerPoolName: %s, NUMAs: %v]", r.name, r.regionType, r.ownerPoolName, r.bindingNumas.String())
301+
}
302+
292303
func (r *QoSRegionBase) GetBindingNumas() machine.CPUSet {
293304
r.Lock()
294305
defer r.Unlock()
@@ -626,7 +637,7 @@ func (r *QoSRegionBase) getProvisionControlKnob() map[types.CPUProvisionPolicyNa
626637
{Key: metricTagKeyControlKnobAction, Val: string(value.Action)},
627638
}...)
628639

629-
klog.InfoS("[qosaware-cpu] get raw control knob", "region", r.name, "policy", internal.name,
640+
klog.InfoS("[qosaware-cpu] get raw control knob", "meta", r.getMetaInfo(), "policy", internal.name,
630641
"knob", name, "action", value.Action, "value", value.Value)
631642
}
632643
}
@@ -636,7 +647,7 @@ func (r *QoSRegionBase) getProvisionControlKnob() map[types.CPUProvisionPolicyNa
636647

637648
// regulateProvisionControlKnob regulate provision control knob for each provision policy
638649
func (r *QoSRegionBase) regulateProvisionControlKnob(originControlKnob map[types.CPUProvisionPolicyName]types.ControlKnob,
639-
lastControlKnob *types.ControlKnob,
650+
effectiveControlKnob types.ControlKnob,
640651
) {
641652
provisionPolicyResults := make(map[types.CPUProvisionPolicyName]*provisionPolicyResult)
642653
firstValidPolicy := types.CPUProvisionPolicyNone
@@ -660,13 +671,13 @@ func (r *QoSRegionBase) regulateProvisionControlKnob(originControlKnob map[types
660671

661672
policyResult, ok := r.provisionPolicyResults[internal.name]
662673
if !ok || policyResult == nil {
663-
policyResult = newProvisionPolicyResult(r.ResourceEssentials, r.cpuRegulatorOptions)
664-
policyResult.regulateControlKnob(controlKnob, lastControlKnob)
674+
policyResult = newProvisionPolicyResult(r.ResourceEssentials, r.cpuRegulatorOptions, r.getMetaInfo())
675+
policyResult.regulateControlKnob(controlKnob, effectiveControlKnob)
665676
} else {
666677
policyResult.setEssentials(r.ResourceEssentials)
667678
// only set regulator last cpu requirement for first valid policy
668679
if internal.name == firstValidPolicy {
669-
policyResult.regulateControlKnob(controlKnob, lastControlKnob)
680+
policyResult.regulateControlKnob(controlKnob, effectiveControlKnob)
670681
} else {
671682
policyResult.regulateControlKnob(controlKnob, nil)
672683
}
@@ -685,8 +696,8 @@ func (r *QoSRegionBase) regulateProvisionControlKnob(originControlKnob map[types
685696
{Key: metricTagKeyControlKnobName, Val: string(knob)},
686697
{Key: metricTagKeyControlKnobAction, Val: string(value.Action)},
687698
}...)
688-
klog.InfoS("[qosaware-cpu] get regulated control knob", "region", r.name, "policy", policy, "knob", knob,
689-
"action", value.Action, "value", value.Value)
699+
klog.InfoS("[qosaware-cpu] get regulated control knob", "region", r.name, "bindingNumas", r.bindingNumas.String(),
700+
"policy", policy, "knob", knob, "action", value.Action, "value", value.Value)
690701
}
691702
}
692703
}

pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_dedicated_numa_exclusive.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,12 +110,12 @@ func (r *QoSRegionDedicatedNumaExclusive) TryUpdateProvision() {
110110
rawControlKnobs := r.getProvisionControlKnob()
111111

112112
// regulate control knobs
113-
r.regulateProvisionControlKnob(rawControlKnobs, &r.ControlKnobs)
113+
r.regulateProvisionControlKnob(rawControlKnobs, r.getEffectiveControlKnobs())
114114
}
115115

116116
func (r *QoSRegionDedicatedNumaExclusive) updateProvisionPolicy() {
117117
r.ControlEssentials = types.ControlEssentials{
118-
ControlKnobs: r.getControlKnobs(),
118+
ControlKnobs: r.getEffectiveControlKnobs(),
119119
ReclaimOverlap: true,
120120
}
121121

@@ -174,7 +174,7 @@ out:
174174
r.idle.Store(idle)
175175
}
176176

177-
func (r *QoSRegionDedicatedNumaExclusive) getControlKnobs() types.ControlKnob {
177+
func (r *QoSRegionDedicatedNumaExclusive) getEffectiveControlKnobs() types.ControlKnob {
178178
reclaimedCPUSize := 0
179179
if reclaimedInfo, ok := r.metaReader.GetPoolInfo(commonstate.PoolNameReclaim); ok {
180180
for _, numaID := range r.bindingNumas.ToSliceInt() {

pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_share.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,12 +94,12 @@ func (r *QoSRegionShare) TryUpdateProvision() {
9494
restrictedControlKnobs := r.restrictProvisionControlKnob(rawControlKnobs)
9595

9696
// regulate control knobs
97-
r.regulateProvisionControlKnob(restrictedControlKnobs, &r.ControlKnobs)
97+
r.regulateProvisionControlKnob(restrictedControlKnobs, r.getEffectiveControlKnobs())
9898
}
9999

100100
func (r *QoSRegionShare) updateProvisionPolicy() {
101101
r.ControlEssentials = types.ControlEssentials{
102-
ControlKnobs: r.getControlKnobs(),
102+
ControlKnobs: r.getEffectiveControlKnobs(),
103103
ReclaimOverlap: r.AllowSharedCoresOverlapReclaimedCores,
104104
}
105105

@@ -197,7 +197,7 @@ func (r *QoSRegionShare) restrictProvisionControlKnob(originControlKnob map[type
197197
return restrictedControlKnob
198198
}
199199

200-
func (r *QoSRegionShare) getControlKnobs() types.ControlKnob {
200+
func (r *QoSRegionShare) getEffectiveControlKnobs() types.ControlKnob {
201201
regionInfo, ok := r.metaReader.GetRegionInfo(r.name)
202202
if ok {
203203
if _, existed := regionInfo.ControlKnobMap[configapi.ControlKnobNonReclaimedCPURequirement]; existed {

pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,8 @@ func TestRestrictProvisionControlKnob(t *testing.T) {
220220
MaxLowerGap: pointer.Float64(1),
221221
},
222222
},
223-
originControlKnob: map[types.CPUProvisionPolicyName]types.ControlKnob{"p1": {"c1": types.ControlKnobValue{Value: 8}}, "p2": {"c1": types.ControlKnobValue{Value: 10}}},
224-
wantControlKnob: map[types.CPUProvisionPolicyName]types.ControlKnob{"p1": {"c1": types.ControlKnobValue{Value: 9}}, "p2": {"c1": types.ControlKnobValue{Value: 10}}},
223+
originControlKnob: map[types.CPUProvisionPolicyName]types.ControlKnob{"p1": {"c1": types.ControlKnobItem{Value: 8}}, "p2": {"c1": types.ControlKnobItem{Value: 10}}},
224+
wantControlKnob: map[types.CPUProvisionPolicyName]types.ControlKnob{"p1": {"c1": types.ControlKnobItem{Value: 9}}, "p2": {"c1": types.ControlKnobItem{Value: 10}}},
225225
},
226226
{
227227
name: "upper ref",
@@ -231,8 +231,8 @@ func TestRestrictProvisionControlKnob(t *testing.T) {
231231
MaxLowerGap: pointer.Float64(1),
232232
},
233233
},
234-
originControlKnob: map[types.CPUProvisionPolicyName]types.ControlKnob{"p1": {"c1": types.ControlKnobValue{Value: 16}}, "p2": {"c1": types.ControlKnobValue{Value: 10}}},
235-
wantControlKnob: map[types.CPUProvisionPolicyName]types.ControlKnob{"p1": {"c1": types.ControlKnobValue{Value: 14}}, "p2": {"c1": types.ControlKnobValue{Value: 10}}},
234+
originControlKnob: map[types.CPUProvisionPolicyName]types.ControlKnob{"p1": {"c1": types.ControlKnobItem{Value: 16}}, "p2": {"c1": types.ControlKnobItem{Value: 10}}},
235+
wantControlKnob: map[types.CPUProvisionPolicyName]types.ControlKnob{"p1": {"c1": types.ControlKnobItem{Value: 14}}, "p2": {"c1": types.ControlKnobItem{Value: 10}}},
236236
},
237237
}
238238

0 commit comments

Comments
 (0)