Skip to content

Commit 322edea

Browse files
authored
Merge pull request #767 from luomingmeng/dev/add-nic-selection-result-annotation
feat(qrm): add nic selection result annotation
2 parents 3c445fa + a3d59cc commit 322edea

File tree

29 files changed

+1454
-156
lines changed

29 files changed

+1454
-156
lines changed

cmd/katalyst-agent/app/agent/qrm/network_plugin.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,23 @@ package qrm
1818

1919
import (
2020
"fmt"
21+
"strings"
2122
"sync"
2223

2324
"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/agent"
25+
phconsts "github.com/kubewharf/katalyst-core/pkg/agent/utilcomponent/periodicalhandler/consts"
2426
"github.com/kubewharf/katalyst-core/pkg/config"
2527
)
2628

2729
const (
2830
QRMPluginNameNetwork = "qrm_network_plugin"
2931
)
3032

33+
var QRMNetworkPluginPeriodicalHandlerGroupName = strings.Join([]string{
34+
QRMPluginNameNetwork,
35+
phconsts.PeriodicalHandlersGroupNameSuffix,
36+
}, phconsts.GroupNameSeparator)
37+
3138
// networkPolicyInitializers is used to store the initializing function for network resource plugin policies
3239
var networkPolicyInitializers sync.Map
3340

cmd/katalyst-agent/app/options/qrm/network_plugin.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ type NetworkOptions struct {
3838
NetInterfaceNameResourceAllocationAnnotationKey string
3939
NetClassIDResourceAllocationAnnotationKey string
4040
NetBandwidthResourceAllocationAnnotationKey string
41+
EnableNICAllocationReactor bool
4142
}
4243

4344
type NetClassOptions struct {
@@ -66,6 +67,7 @@ func NewNetworkOptions() *NetworkOptions {
6667
NetInterfaceNameResourceAllocationAnnotationKey: "qrm.katalyst.kubewharf.io/nic_name",
6768
NetClassIDResourceAllocationAnnotationKey: "qrm.katalyst.kubewharf.io/netcls_id",
6869
NetBandwidthResourceAllocationAnnotationKey: "qrm.katalyst.kubewharf.io/net_bandwidth",
70+
EnableNICAllocationReactor: true,
6971
}
7072
}
7173

@@ -106,6 +108,8 @@ func (o *NetworkOptions) AddFlags(fss *cliflag.NamedFlagSets) {
106108
o.NetClassIDResourceAllocationAnnotationKey, "The annotation key of allocated netcls id for the container, which is ready by runtime")
107109
fs.StringVar(&o.NetBandwidthResourceAllocationAnnotationKey, "network-resource-plugin-bandwidth-allocation-anno-key",
108110
o.NetBandwidthResourceAllocationAnnotationKey, "The annotation key of allocated bandwidth for the container, which is ready by runtime")
111+
fs.BoolVar(&o.EnableNICAllocationReactor, "enable-network-resource-plugin-nic-allocation-reactor",
112+
o.EnableNICAllocationReactor, "enable network allocation reactor, default is true")
109113
}
110114

111115
func (o *NetworkOptions) ApplyTo(conf *qrmconfig.NetworkQRMPluginConfig) error {
@@ -126,6 +130,7 @@ func (o *NetworkOptions) ApplyTo(conf *qrmconfig.NetworkQRMPluginConfig) error {
126130
conf.NetInterfaceNameResourceAllocationAnnotationKey = o.NetInterfaceNameResourceAllocationAnnotationKey
127131
conf.NetClassIDResourceAllocationAnnotationKey = o.NetClassIDResourceAllocationAnnotationKey
128132
conf.NetBandwidthResourceAllocationAnnotationKey = o.NetBandwidthResourceAllocationAnnotationKey
133+
conf.EnableNICAllocationReactor = o.EnableNICAllocationReactor
129134

130135
return nil
131136
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*
2+
Copyright 2022 The Katalyst Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package commonstate
18+
19+
type Allocation interface {
20+
GetPodUid() string
21+
GetPodNamespace() string
22+
GetPodName() string
23+
GetContainerName() string
24+
GetContainerType() string
25+
GetContainerIndex() uint64
26+
GetOwnerPoolName() string
27+
GetPodRole() string
28+
GetPodType() string
29+
GetLabels() map[string]string
30+
GetAnnotations() map[string]string
31+
GetQoSLevel() string
32+
}

pkg/agent/qrm-plugins/commonstate/state.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,50 @@ type AllocationMeta struct {
4343
QoSLevel string `json:"qosLevel"`
4444
}
4545

46+
func (am *AllocationMeta) GetPodUid() string {
47+
return am.PodUid
48+
}
49+
50+
func (am *AllocationMeta) GetPodNamespace() string {
51+
return am.PodNamespace
52+
}
53+
54+
func (am *AllocationMeta) GetPodName() string {
55+
return am.PodName
56+
}
57+
58+
func (am *AllocationMeta) GetContainerName() string {
59+
return am.ContainerName
60+
}
61+
62+
func (am *AllocationMeta) GetContainerType() string {
63+
return am.ContainerType
64+
}
65+
66+
func (am *AllocationMeta) GetContainerIndex() uint64 {
67+
return am.ContainerIndex
68+
}
69+
70+
func (am *AllocationMeta) GetPodRole() string {
71+
return am.PodRole
72+
}
73+
74+
func (am *AllocationMeta) GetPodType() string {
75+
return am.PodType
76+
}
77+
78+
func (am *AllocationMeta) GetLabels() map[string]string {
79+
return am.Labels
80+
}
81+
82+
func (am *AllocationMeta) GetAnnotations() map[string]string {
83+
return am.Annotations
84+
}
85+
86+
func (am *AllocationMeta) GetQoSLevel() string {
87+
return am.QoSLevel
88+
}
89+
4690
func (am *AllocationMeta) Clone() *AllocationMeta {
4791
clone := &AllocationMeta{
4892
PodUid: am.PodUid,
Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
/*
2+
Copyright 2022 The Katalyst Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package commonstate
18+
19+
import (
20+
"testing"
21+
22+
"github.com/stretchr/testify/require"
23+
pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
24+
25+
"github.com/kubewharf/katalyst-api/pkg/consts"
26+
cpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/consts"
27+
)
28+
29+
func TestAllocationMetaGetters(t *testing.T) {
30+
t.Parallel()
31+
32+
t.Run("normal pod name", func(t *testing.T) {
33+
t.Parallel()
34+
meta := &AllocationMeta{PodName: "test-pod"}
35+
require.Equal(t, "test-pod", meta.GetPodName())
36+
})
37+
38+
t.Run("empty pod name", func(t *testing.T) {
39+
t.Parallel()
40+
meta := &AllocationMeta{}
41+
require.Equal(t, "", meta.GetPodName())
42+
})
43+
44+
t.Run("cloned meta", func(t *testing.T) {
45+
t.Parallel()
46+
original := &AllocationMeta{
47+
PodName: "original-pod",
48+
Labels: map[string]string{"key": "value"},
49+
}
50+
clone := original.Clone()
51+
require.Equal(t, original.GetPodName(), clone.GetPodName())
52+
})
53+
54+
// Test other getters
55+
t.Run("pod uid getter", func(t *testing.T) {
56+
t.Parallel()
57+
meta := &AllocationMeta{PodUid: "test-uid"}
58+
require.Equal(t, "test-uid", meta.GetPodUid())
59+
})
60+
61+
t.Run("pod namespace getter", func(t *testing.T) {
62+
t.Parallel()
63+
meta := &AllocationMeta{PodNamespace: "test-ns"}
64+
require.Equal(t, "test-ns", meta.GetPodNamespace())
65+
})
66+
67+
t.Run("container name getter", func(t *testing.T) {
68+
t.Parallel()
69+
meta := &AllocationMeta{ContainerName: "test-container"}
70+
require.Equal(t, "test-container", meta.GetContainerName())
71+
})
72+
73+
// Test container type checks
74+
t.Run("main container check", func(t *testing.T) {
75+
t.Parallel()
76+
meta := &AllocationMeta{ContainerType: pluginapi.ContainerType_MAIN.String()}
77+
require.True(t, meta.CheckMainContainer())
78+
require.False(t, meta.CheckSideCar())
79+
})
80+
81+
t.Run("sidecar container check", func(t *testing.T) {
82+
t.Parallel()
83+
meta := &AllocationMeta{ContainerType: pluginapi.ContainerType_SIDECAR.String()}
84+
require.True(t, meta.CheckSideCar())
85+
require.False(t, meta.CheckMainContainer())
86+
})
87+
88+
// Enhanced clone test
89+
t.Run("deep clone verification", func(t *testing.T) {
90+
t.Parallel()
91+
original := &AllocationMeta{
92+
Annotations: map[string]string{"key": "value"},
93+
QoSLevel: "test-qos",
94+
ContainerIndex: 1,
95+
}
96+
clone := original.Clone()
97+
clone.Annotations["key"] = "modified"
98+
clone.QoSLevel = "changed"
99+
clone.ContainerIndex = 2
100+
101+
require.Equal(t, "value", original.Annotations["key"])
102+
require.Equal(t, "test-qos", original.QoSLevel)
103+
require.Equal(t, uint64(1), original.ContainerIndex)
104+
})
105+
106+
// Pool name tests
107+
t.Run("pool name with owner", func(t *testing.T) {
108+
t.Parallel()
109+
meta := &AllocationMeta{OwnerPoolName: "test-pool"}
110+
require.Equal(t, "test-pool", meta.GetPoolName())
111+
})
112+
113+
t.Run("pool name from qos", func(t *testing.T) {
114+
t.Parallel()
115+
meta := &AllocationMeta{QoSLevel: "shared_cores"}
116+
require.Contains(t, meta.GetPoolName(), "share")
117+
})
118+
119+
t.Run("pod role getter", func(t *testing.T) {
120+
t.Parallel()
121+
meta := &AllocationMeta{PodRole: "test-role"}
122+
require.Equal(t, "test-role", meta.GetPodRole())
123+
})
124+
125+
t.Run("pod type getter", func(t *testing.T) {
126+
t.Parallel()
127+
meta := &AllocationMeta{PodType: "test-type"}
128+
require.Equal(t, "test-type", meta.GetPodType())
129+
})
130+
131+
t.Run("labels getter", func(t *testing.T) {
132+
t.Parallel()
133+
meta := &AllocationMeta{Labels: map[string]string{"key": "value"}}
134+
require.Equal(t, "value", meta.GetLabels()["key"])
135+
})
136+
137+
t.Run("annotations getter", func(t *testing.T) {
138+
t.Parallel()
139+
meta := &AllocationMeta{Annotations: map[string]string{"anno": "data"}}
140+
require.Equal(t, "data", meta.GetAnnotations()["anno"])
141+
})
142+
143+
t.Run("qos level getter", func(t *testing.T) {
144+
t.Parallel()
145+
meta := &AllocationMeta{QoSLevel: "dedicated"}
146+
require.Equal(t, "dedicated", meta.GetQoSLevel())
147+
})
148+
149+
t.Run("check dedicated cores", func(t *testing.T) {
150+
t.Parallel()
151+
meta := &AllocationMeta{QoSLevel: consts.PodAnnotationQoSLevelDedicatedCores}
152+
require.True(t, meta.CheckDedicated())
153+
})
154+
155+
t.Run("check shared cores", func(t *testing.T) {
156+
t.Parallel()
157+
meta := &AllocationMeta{QoSLevel: consts.PodAnnotationQoSLevelSharedCores}
158+
require.True(t, meta.CheckShared())
159+
})
160+
161+
t.Run("check numa binding", func(t *testing.T) {
162+
t.Parallel()
163+
meta := &AllocationMeta{
164+
Annotations: map[string]string{
165+
consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable,
166+
},
167+
}
168+
require.True(t, meta.CheckNUMABinding())
169+
})
170+
171+
t.Run("check actual numa binding", func(t *testing.T) {
172+
t.Parallel()
173+
meta := &AllocationMeta{
174+
Annotations: map[string]string{
175+
cpuconsts.CPUStateAnnotationKeyNUMAHint: "0",
176+
},
177+
}
178+
require.True(t, meta.CheckActualNUMABinding())
179+
})
180+
}

pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,13 @@ import (
4141
memconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/consts"
4242
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/dynamicpolicy/memoryadvisor"
4343
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/dynamicpolicy/oom"
44-
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/dynamicpolicy/reactor"
44+
memoryreactor "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/dynamicpolicy/reactor"
4545
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/dynamicpolicy/state"
4646
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/handlers/fragmem"
4747
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/handlers/logcache"
4848
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/handlers/sockmem"
4949
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util"
50+
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util/reactor"
5051
"github.com/kubewharf/katalyst-core/pkg/agent/utilcomponent/periodicalhandler"
5152
"github.com/kubewharf/katalyst-core/pkg/config"
5253
dynamicconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic"
@@ -154,7 +155,6 @@ type DynamicPolicy struct {
154155
enableReclaimNUMABinding bool
155156
enableNonBindingShareCoresMemoryResourceCheck bool
156157

157-
enableNUMAAllocationReactor bool
158158
numaAllocationReactor reactor.AllocationReactor
159159
numaBindResultResourceAllocationAnnotationKey string
160160
}
@@ -225,7 +225,6 @@ func NewDynamicPolicy(agentCtx *agent.GenericContext, conf *config.Configuration
225225
enableEvictingLogCache: conf.EnableEvictingLogCache,
226226
enableReclaimNUMABinding: conf.EnableReclaimNUMABinding,
227227
enableNonBindingShareCoresMemoryResourceCheck: conf.EnableNonBindingShareCoresMemoryResourceCheck,
228-
enableNUMAAllocationReactor: conf.EnableNUMAAllocationReactor,
229228
numaBindResultResourceAllocationAnnotationKey: conf.NUMABindResultResourceAllocationAnnotationKey,
230229
}
231230

@@ -280,11 +279,12 @@ func NewDynamicPolicy(agentCtx *agent.GenericContext, conf *config.Configuration
280279
}
281280

282281
policyImplement.numaAllocationReactor = reactor.DummyAllocationReactor{}
283-
if policyImplement.enableNUMAAllocationReactor {
284-
policyImplement.numaAllocationReactor = reactor.NewNUMAAllocationReactor(
285-
agentCtx.MetaServer.PodFetcher,
286-
agentCtx.Client.KubeClient,
287-
)
282+
if conf.EnableNUMAAllocationReactor {
283+
policyImplement.numaAllocationReactor = memoryreactor.NewNUMAPodAllocationReactor(
284+
reactor.NewPodAllocationReactor(
285+
agentCtx.MetaServer.PodFetcher,
286+
agentCtx.Client.KubeClient,
287+
))
288288
}
289289

290290
return true, &agent.PluginWrapper{GenericPlugin: pluginWrapper}, nil

pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,9 @@ import (
5959
memconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/consts"
6060
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/dynamicpolicy/memoryadvisor"
6161
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/dynamicpolicy/oom"
62-
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/dynamicpolicy/reactor"
6362
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/dynamicpolicy/state"
6463
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util"
64+
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util/reactor"
6565
"github.com/kubewharf/katalyst-core/pkg/config"
6666
configagent "github.com/kubewharf/katalyst-core/pkg/config/agent"
6767
"github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic"

0 commit comments

Comments
 (0)