Skip to content

Commit 32a3253

Browse files
authored
Merge pull request #710 from gary-lgy/gary/fix-numa-balancer-cgroupv1
fix(qrm): fix printing of numa mem migrate container stats
2 parents bb80416 + e2eee22 commit 32a3253

File tree

5 files changed

+168
-26
lines changed

5 files changed

+168
-26
lines changed

pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_advisor_handler.go

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -570,10 +570,10 @@ func (p *DynamicPolicy) handleNumaMemoryBalance(_ *config.Configuration,
570570
}
571571

572572
type containerMigrateStat struct {
573-
containerID string
574-
cgroupPath string
575-
rssBefore uint64
576-
rssAfter uint64
573+
ContainerID string
574+
CgroupPath string
575+
RssBefore uint64
576+
RssAfter uint64
577577
}
578578

579579
func (p *DynamicPolicy) doNumaMemoryBalance(ctx context.Context, advice types.NumaMemoryBalanceAdvice) error {
@@ -618,8 +618,8 @@ func (p *DynamicPolicy) doNumaMemoryBalance(ctx context.Context, advice types.Nu
618618

619619
key := keyFunc(containerInfo.PodUID, containerInfo.ContainerName)
620620
containerStats[key] = &containerMigrateStat{
621-
containerID: containerID,
622-
cgroupPath: memoryAbsCGPath,
621+
ContainerID: containerID,
622+
CgroupPath: memoryAbsCGPath,
623623
}
624624
}
625625

@@ -633,16 +633,16 @@ func (p *DynamicPolicy) doNumaMemoryBalance(ctx context.Context, advice types.Nu
633633
}
634634

635635
stats := containerStats[containerKey]
636-
rssBefore, err := getAnonOnNumaFunc(stats.cgroupPath, advice.SourceNuma)
636+
rssBefore, err := getAnonOnNumaFunc(stats.CgroupPath, advice.SourceNuma)
637637
if err != nil {
638638
general.Errorf("getAnonOnNumaFunc failed for container[%v/%v] numa [%v],err: %v",
639639
containerInfo.PodUID, containerInfo.ContainerName, advice.SourceNuma, err)
640640
}
641-
stats.rssBefore = rssBefore
641+
stats.RssBefore = rssBefore
642642

643643
containerNumaSet := machine.NewCPUSet(containerInfo.DestNumaList...)
644644
if containerNumaSet.Contains(destNuma) {
645-
err = MigratePagesForContainer(ctx, containerInfo.PodUID, stats.containerID, p.topology.NumNUMANodes,
645+
err = MigratePagesForContainer(ctx, containerInfo.PodUID, stats.ContainerID, p.topology.NumNUMANodes,
646646
machine.NewCPUSet(advice.SourceNuma), machine.NewCPUSet(destNuma))
647647
if err != nil {
648648
general.Errorf("MigratePagesForContainer failed for container[%v/%v] source_numa [%v],dest_numa [%v],err: %v",
@@ -652,12 +652,12 @@ func (p *DynamicPolicy) doNumaMemoryBalance(ctx context.Context, advice types.Nu
652652
general.Infof("skip migrate container %v/%v memory from %v to %v", containerInfo.PodUID, containerInfo.ContainerName, advice.SourceNuma, advice.DestNumaList)
653653
}
654654

655-
rssAfter, err := getAnonOnNumaFunc(stats.cgroupPath, advice.SourceNuma)
655+
rssAfter, err := getAnonOnNumaFunc(stats.CgroupPath, advice.SourceNuma)
656656
if err != nil {
657657
general.Errorf("getAnonOnNumaFunc failed for container[%v/%v] numa [%v],err: %v",
658658
containerInfo.PodUID, containerInfo.ContainerName, advice.SourceNuma, err)
659659
}
660-
stats.rssAfter = rssAfter
660+
stats.RssAfter = rssAfter
661661

662662
if rssAfter > rssBefore {
663663
general.Infof("rssAfter: %d greater than rssBefore: %d", rssAfter, rssBefore)
@@ -673,8 +673,8 @@ func (p *DynamicPolicy) doNumaMemoryBalance(ctx context.Context, advice types.Nu
673673
containerStatJson, _ := json.Marshal(containerStats)
674674
general.Infof("containerStats: %+v", string(containerStatJson))
675675
for _, stat := range containerStats {
676-
totalRSSAfter += stat.rssAfter
677-
totalRSSBefore += stat.rssBefore
676+
totalRSSAfter += stat.RssAfter
677+
totalRSSBefore += stat.RssBefore
678678
}
679679
general.Infof("numa memory balance migration succeed, advice total rss: %v, threshold: %v, sourceNuma:%v, targetNuma:%v, total rss before:%v, total rss after: %v",
680680
advice.TotalRSS, advice.Threshold, advice.SourceNuma, destNuma, totalRSSBefore, totalRSSAfter)

pkg/util/cgroup/common/cgroup_linux.go

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -80,23 +80,30 @@ func GetCgroupParamInt(cgroupPath, cgroupFile string) (int64, error) {
8080
return res, nil
8181
}
8282

83-
// ParseCgroupNumaValue parse cgroup numa stat files like memory.numa_stat, the format is like "anon N0=1686843392 N1=1069957120 N2=316747776 N3=163962880"
84-
func ParseCgroupNumaValue(cgroupPath, cgroupFile string) (map[string]map[int]uint64, error) {
85-
fileName := filepath.Join(cgroupPath, cgroupFile)
86-
content, err := ioutil.ReadFile(fileName)
87-
if err != nil {
88-
return nil, err
89-
}
83+
/*
84+
ParseCgroupNumaValue parse cgroup numa stat files like `memory.numa_stat`.
85+
86+
cgroup v1 format:
87+
<counter>=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
88+
hierarchical_<counter>=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
9089
90+
cgroup v2 format:
91+
<counter> N0=<bytes in node 0> N1=<bytes in node 1> ...
92+
*/
93+
func ParseCgroupNumaValue(content string) (map[string]map[int]uint64, error) {
9194
result := make(map[string]map[int]uint64)
92-
lines := strings.Split(string(content), "\n")
95+
lines := strings.Split(content, "\n")
9396
for _, line := range lines {
9497
cols := strings.Fields(line)
9598
if len(cols) <= 1 {
9699
continue
97100
}
98101

99102
key := cols[0]
103+
if index := strings.Index(key, "="); index != -1 {
104+
// For v1 format, remove the suffix after "="
105+
key = key[:index]
106+
}
100107
numaInfo := make(map[int]uint64)
101108
for _, pair := range cols[1:] {
102109
parts := strings.Split(pair, "=")
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
//go:build linux
2+
// +build linux
3+
4+
/*
5+
Copyright 2022 The Katalyst Authors.
6+
7+
Licensed under the Apache License, Version 2.0 (the "License");
8+
you may not use this file except in compliance with the License.
9+
You may obtain a copy of the License at
10+
11+
http://www.apache.org/licenses/LICENSE-2.0
12+
13+
Unless required by applicable law or agreed to in writing, software
14+
distributed under the License is distributed on an "AS IS" BASIS,
15+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
See the License for the specific language governing permissions and
17+
limitations under the License.
18+
*/
19+
20+
package common
21+
22+
import (
23+
"reflect"
24+
"testing"
25+
)
26+
27+
func TestParseCgroupNumaValue(t *testing.T) {
28+
t.Parallel()
29+
30+
type args struct {
31+
content string
32+
}
33+
tests := []struct {
34+
name string
35+
args args
36+
want map[string]map[int]uint64
37+
wantErr bool
38+
}{
39+
{
40+
name: "cgroupv1 format",
41+
args: args{
42+
content: `total=7587426 N0=92184 N1=21339 N2=104047 N3=7374122
43+
file=70686 N0=5353 N1=3096 N2=12817 N3=51844
44+
anon=7516740 N0=86831 N1=18243 N2=91230 N3=7322278
45+
unevictable=0 N0=0 N1=0 N2=0 N3=0`,
46+
},
47+
want: map[string]map[int]uint64{
48+
"total": {
49+
0: 92184,
50+
1: 21339,
51+
2: 104047,
52+
3: 7374122,
53+
},
54+
"file": {
55+
0: 5353,
56+
1: 3096,
57+
2: 12817,
58+
3: 51844,
59+
},
60+
"anon": {
61+
0: 86831,
62+
1: 18243,
63+
2: 91230,
64+
3: 7322278,
65+
},
66+
"unevictable": {
67+
0: 0,
68+
1: 0,
69+
2: 0,
70+
3: 0,
71+
},
72+
},
73+
wantErr: false,
74+
},
75+
{
76+
name: "cgroupv2 format",
77+
args: args{
78+
content: `anon N0=1629990912 N1=65225723904
79+
file N0=1892352 N1=37441536
80+
unevictable N0=0 N1=0`,
81+
},
82+
want: map[string]map[int]uint64{
83+
"anon": {
84+
0: 1629990912,
85+
1: 65225723904,
86+
},
87+
"file": {
88+
0: 1892352,
89+
1: 37441536,
90+
},
91+
"unevictable": {
92+
0: 0,
93+
1: 0,
94+
},
95+
},
96+
wantErr: false,
97+
},
98+
{
99+
name: "wrong separator",
100+
args: args{
101+
content: `anon N0:1629990912 N1:65225723904
102+
file N0:1892352 N1:37441536
103+
unevictable N0:0 N1:0`,
104+
},
105+
wantErr: true,
106+
},
107+
}
108+
for _, tt := range tests {
109+
tt := tt
110+
t.Run(tt.name, func(t *testing.T) {
111+
t.Parallel()
112+
113+
got, err := ParseCgroupNumaValue(tt.args.content)
114+
if (err != nil) != tt.wantErr {
115+
t.Errorf("ParseCgroupNumaValue() error = %v, wantErr %v", err, tt.wantErr)
116+
return
117+
}
118+
if !reflect.DeepEqual(got, tt.want) {
119+
t.Errorf("ParseCgroupNumaValue() = %v, want %v", got, tt.want)
120+
return
121+
}
122+
})
123+
}
124+
}

pkg/util/cgroup/manager/v1/fs_linux.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,9 +239,15 @@ func (m *manager) GetMemory(absCgroupPath string) (*common.MemoryStats, error) {
239239
}
240240

241241
func (m *manager) GetNumaMemory(absCgroupPath string) (map[int]*common.MemoryNumaMetrics, error) {
242-
numaStat, err := common.ParseCgroupNumaValue(absCgroupPath, "memory.numa_stat")
242+
const fileName = "memory.numa_stat"
243+
content, err := libcgroups.ReadFile(absCgroupPath, fileName)
243244
if err != nil {
244-
return nil, err
245+
return nil, fmt.Errorf("failed to read %s: %w", fileName, err)
246+
}
247+
248+
numaStat, err := common.ParseCgroupNumaValue(content)
249+
if err != nil {
250+
return nil, fmt.Errorf("failed to parse numa stat: %w", err)
245251
}
246252

247253
pageSize := uint64(syscall.Getpagesize())

pkg/util/cgroup/manager/v2/fs_linux.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -282,10 +282,15 @@ func (m *manager) GetMemory(absCgroupPath string) (*common.MemoryStats, error) {
282282
}
283283

284284
func (m *manager) GetNumaMemory(absCgroupPath string) (map[int]*common.MemoryNumaMetrics, error) {
285-
numaStat, err := common.ParseCgroupNumaValue(absCgroupPath, "memory.numa_stat")
286-
general.Infof("get cgroup %+v numa stat %+v", absCgroupPath, numaStat)
285+
const fileName = "memory.numa_stat"
286+
content, err := libcgroups.ReadFile(absCgroupPath, fileName)
287287
if err != nil {
288-
return nil, err
288+
return nil, fmt.Errorf("failed to read %s: %w", fileName, err)
289+
}
290+
291+
numaStat, err := common.ParseCgroupNumaValue(content)
292+
if err != nil {
293+
return nil, fmt.Errorf("failed to parse numa stat: %w", err)
289294
}
290295

291296
result := make(map[int]*common.MemoryNumaMetrics)

0 commit comments

Comments
 (0)