Skip to content

Commit 782d443

Browse files
authored
Merge pull request #644 from cheney-lin/dev/fix-overlap
fix(advisor): fix reclaim overlap share cores
2 parents 8fd9290 + 46ed456 commit 782d443

File tree

4 files changed

+155
-20
lines changed

4 files changed

+155
-20
lines changed

pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common.go

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package provisionassembler
1919
import (
2020
"context"
2121
"fmt"
22-
"math"
2322
"time"
2423

2524
"k8s.io/klog/v2"
@@ -144,7 +143,7 @@ func (pa *ProvisionAssemblerCommon) AssembleProvision() (types.InternalCPUCalcul
144143
calculationResult.SetPoolEntry(poolName, regionNuma, size)
145144
}
146145

147-
reclaimedCoresSize := available - general.SumUpMapValues(poolSizes) + reservedForReclaim
146+
var reclaimedCoresSize int
148147
if *pa.allowSharedCoresOverlapReclaimedCores {
149148
reclaimedCoresAvail := poolSizes[r.OwnerPoolName()] - poolRequirements[r.OwnerPoolName()]
150149
if !nodeEnableReclaim {
@@ -153,6 +152,11 @@ func (pa *ProvisionAssemblerCommon) AssembleProvision() (types.InternalCPUCalcul
153152
reclaimedCoresSize = general.Max(pa.getNumasReservedForReclaim(r.GetBindingNumas()), reclaimedCoresAvail)
154153
reclaimedCoresSize = general.Min(reclaimedCoresSize, poolSizes[r.OwnerPoolName()])
155154
calculationResult.SetPoolOverlapInfo(state.PoolNameReclaim, regionNuma, r.OwnerPoolName(), reclaimedCoresSize)
155+
} else {
156+
reclaimedCoresSize = available - general.SumUpMapValues(poolSizes) + reservedForReclaim
157+
if !nodeEnableReclaim {
158+
reclaimedCoresSize = reservedForReclaim
159+
}
156160
}
157161

158162
calculationResult.SetPoolEntry(state.PoolNameReclaim, regionNuma, reclaimedCoresSize)
@@ -236,7 +240,7 @@ func (pa *ProvisionAssemblerCommon) AssembleProvision() (types.InternalCPUCalcul
236240
calculationResult.SetPoolEntry(poolName, state.FakedNUMAID, poolSize)
237241
}
238242

239-
reclaimPoolSizeOfNonBindingNUMAs := shareAndIsolatedPoolAvailable - general.SumUpMapValues(shareAndIsolatePoolSizes) + pa.getNumasReservedForReclaim(*pa.nonBindingNumas)
243+
var reclaimPoolSizeOfNonBindingNUMAs int
240244
if *pa.allowSharedCoresOverlapReclaimedCores {
241245
isolated := shareAndIsolatedPoolAvailable
242246
sharePoolSizes := make(map[string]int)
@@ -251,29 +255,20 @@ func (pa *ProvisionAssemblerCommon) AssembleProvision() (types.InternalCPUCalcul
251255

252256
if len(sharePoolSizes) > 0 {
253257
reclaimPoolSizeOfNonBindingNUMAs = general.Min(reclaimPoolSizeOfNonBindingNUMAs, general.SumUpMapValues(sharePoolSizes))
254-
sharedOverlapReclaimSize := make(map[string]int) // sharedPoolName -> reclaimedSize
255-
sharePoolSum := general.SumUpMapValues(sharePoolSizes)
256-
ps := general.SortedByValue(sharePoolSizes)
257-
for _, p := range ps {
258-
sharePoolSize := p.Value
259-
sharePoolName := p.Key
260-
size := int(math.Floor(float64(reclaimPoolSizeOfNonBindingNUMAs*sharePoolSize) / float64(sharePoolSum)))
261-
if size > 0 {
262-
sharedOverlapReclaimSize[sharePoolName] = size
263-
}
264-
}
265-
266-
diff := reclaimPoolSizeOfNonBindingNUMAs - general.SumUpMapValues(sharedOverlapReclaimSize)
267-
if diff > 0 {
268-
sharedOverlapReclaimSize[ps[0].Key] += diff
269-
} else if diff < 0 {
258+
sharedOverlapReclaimSize, err := regulateOverlapReclaimPoolSize(sharePoolSizes, reclaimPoolSizeOfNonBindingNUMAs)
259+
if err != nil {
270260
return types.InternalCPUCalculationResult{}, fmt.Errorf("failed to calculate sharedOverlapReclaimSize")
271261
}
272262

273263
for overlapPoolName, size := range sharedOverlapReclaimSize {
274264
calculationResult.SetPoolOverlapInfo(state.PoolNameReclaim, state.FakedNUMAID, overlapPoolName, size)
275265
}
276266
}
267+
} else {
268+
reclaimPoolSizeOfNonBindingNUMAs = shareAndIsolatedPoolAvailable - general.SumUpMapValues(shareAndIsolatePoolSizes) + pa.getNumasReservedForReclaim(*pa.nonBindingNumas)
269+
if !nodeEnableReclaim {
270+
reclaimPoolSizeOfNonBindingNUMAs = pa.getNumasReservedForReclaim(*pa.nonBindingNumas)
271+
}
277272
}
278273

279274
calculationResult.SetPoolEntry(state.PoolNameReclaim, state.FakedNUMAID, reclaimPoolSizeOfNonBindingNUMAs)

pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -918,7 +918,7 @@ func TestAssembleProvision(t *testing.T) {
918918
},
919919
},
920920
expectPoolOverlapInfo: map[string]map[int]map[string]int{
921-
"reclaim": {-1: map[string]int{"share-a": 15, "share-b": 19}},
921+
"reclaim": {-1: map[string]int{"share-a": 14, "share-b": 20}},
922922
},
923923
},
924924
}

pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/helper.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,37 @@ func getNUMAsResource(resources map[int]int, numas machine.CPUSet) int {
3232
return res
3333
}
3434

35+
func regulateOverlapReclaimPoolSize(sharePoolSizes map[string]int, overlapReclaimPoolSizeRequired int) (map[string]int, error) {
36+
sharePoolSum := general.SumUpMapValues(sharePoolSizes)
37+
if overlapReclaimPoolSizeRequired > sharePoolSum {
38+
return nil, fmt.Errorf("invalid sharedOverlapReclaimSize")
39+
}
40+
41+
overlapReclaimPoolSizeRequiredLeft := overlapReclaimPoolSizeRequired
42+
sharedOverlapReclaimSize := make(map[string]int) // sharedPoolName -> reclaimedSize
43+
ps := general.SortedByValue(sharePoolSizes)
44+
for i := 0; i < len(ps); i++ {
45+
index := len(ps) - 1 - i
46+
sharePoolSize := ps[index].Value
47+
sharePoolName := ps[index].Key
48+
49+
size := int(math.Ceil(float64(overlapReclaimPoolSizeRequired*sharePoolSize) / float64(sharePoolSum)))
50+
if size > sharePoolSize {
51+
size = sharePoolSize
52+
}
53+
if size > overlapReclaimPoolSizeRequiredLeft {
54+
size = overlapReclaimPoolSizeRequiredLeft
55+
}
56+
sharedOverlapReclaimSize[sharePoolName] = size
57+
overlapReclaimPoolSizeRequiredLeft -= size
58+
if overlapReclaimPoolSizeRequiredLeft == 0 {
59+
break
60+
}
61+
}
62+
63+
return sharedOverlapReclaimSize, nil
64+
}
65+
3566
// regulatePoolSizes modifies pool size map to legal values, taking total available
3667
// resource and config such as enable reclaim into account. should be compatible with
3768
// any case and not return error. return true if reach resource upper bound.
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
/*
2+
Copyright 2022 The Katalyst Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package provisionassembler
18+
19+
import (
20+
"fmt"
21+
"testing"
22+
23+
"github.com/stretchr/testify/require"
24+
)
25+
26+
type regulateOverlapReclaimPoolSizeCase struct {
27+
name string
28+
sharePoolSizes map[string]int
29+
overlapReclaimPoolSizeRequired int
30+
expectError error
31+
expectOverlapReclaimPoolSize map[string]int
32+
}
33+
34+
func Test_regulateOverlapReclaimPoolSize(t *testing.T) {
35+
t.Parallel()
36+
37+
testCases := []regulateOverlapReclaimPoolSizeCase{
38+
{
39+
name: "case1",
40+
sharePoolSizes: map[string]int{
41+
"share": 9,
42+
"bmq": 8,
43+
"flink": 7,
44+
},
45+
overlapReclaimPoolSizeRequired: 30,
46+
expectError: fmt.Errorf("invalid sharedOverlapReclaimSize"),
47+
expectOverlapReclaimPoolSize: nil,
48+
},
49+
{
50+
name: "case2",
51+
sharePoolSizes: map[string]int{
52+
"share": 9,
53+
"bmq": 8,
54+
"flink": 7,
55+
},
56+
overlapReclaimPoolSizeRequired: 2,
57+
expectError: nil,
58+
expectOverlapReclaimPoolSize: map[string]int{
59+
"share": 1,
60+
"bmq": 1,
61+
},
62+
},
63+
{
64+
name: "case3",
65+
sharePoolSizes: map[string]int{
66+
"share": 9,
67+
"bmq": 8,
68+
"flink": 7,
69+
},
70+
overlapReclaimPoolSizeRequired: 4,
71+
expectError: nil,
72+
expectOverlapReclaimPoolSize: map[string]int{
73+
"share": 2,
74+
"bmq": 2,
75+
},
76+
},
77+
{
78+
name: "case4",
79+
sharePoolSizes: map[string]int{
80+
"share": 9,
81+
"bmq": 8,
82+
"flink": 7,
83+
},
84+
overlapReclaimPoolSizeRequired: 8,
85+
expectError: nil,
86+
expectOverlapReclaimPoolSize: map[string]int{
87+
"share": 3,
88+
"bmq": 3,
89+
"flink": 2,
90+
},
91+
},
92+
}
93+
94+
for _, testCase := range testCases {
95+
func(testCase regulateOverlapReclaimPoolSizeCase) {
96+
t.Run(testCase.name, func(t *testing.T) {
97+
t.Parallel()
98+
99+
result, err := regulateOverlapReclaimPoolSize(testCase.sharePoolSizes, testCase.overlapReclaimPoolSizeRequired)
100+
if testCase.expectError == nil {
101+
require.NoError(t, err, "failed to regulate overlap reclaim pool size")
102+
} else {
103+
require.EqualError(t, err, testCase.expectError.Error(), "invalid error return")
104+
}
105+
require.Equal(t, testCase.expectOverlapReclaimPoolSize, result, "invalid result")
106+
})
107+
}(testCase)
108+
}
109+
}

0 commit comments

Comments
 (0)