Skip to content

Commit 27280f3

Browse files
authored
Merge pull request #841 from yehlemias/dev/rootfs-eviction-for-abnormal-pod
feat(eviction): support rootfs overuse eviction plugin
2 parents a8b477f + 4dbdeda commit 27280f3

File tree

9 files changed

+621
-9
lines changed

9 files changed

+621
-9
lines changed

cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/rootfs_pressure_eviction.go

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"k8s.io/apimachinery/pkg/api/resource"
2222
cliflag "k8s.io/component-base/cli/flag"
2323

24+
"github.com/kubewharf/katalyst-api/pkg/consts"
2425
"github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/adminqos/eviction"
2526
)
2627

@@ -33,6 +34,16 @@ const (
3334
defaultReclaimedQoSPodUsedPriorityThreshold = ""
3435
defaultReclaimedQoSPodInodesUsedPriorityThreshold = ""
3536
defaultMinimumImageFsDiskSizeThreshold = "10Gi"
37+
38+
defaultEnableRootfsOveruseEviction = false
39+
defaultSharedQoSRootfsOveruseThreshold = ""
40+
defaultReclaimedQoSRootfsOveruseThreshold = ""
41+
defaultRootfsOveruseEvictionCount = 1
42+
)
43+
44+
var (
45+
defaultSupportedQoSLevels = []string{consts.PodAnnotationQoSLevelSharedCores}
46+
defaultSharedQoSNamespaceFilter = []string{"default"}
3647
)
3748

3849
type RootfsPressureEvictionOptions struct {
@@ -45,6 +56,15 @@ type RootfsPressureEvictionOptions struct {
4556
ReclaimedQoSPodInodesUsedPriorityThreshold string
4657
MinimumImageFsDiskCapacityThreshold string
4758
GracePeriod int64
59+
60+
EnableRootfsOveruseEviction bool
61+
RootfsOveruseEvictionSupportedQoSLevels []string
62+
// RootfsOveruseEviction only supports shared qos and reclaimed qos, so
63+
// only thresholds for shared cores and reclaimed cores can be configured.
64+
SharedQoSRootfsOveruseThreshold string
65+
ReclaimedQoSRootfsOveruseThreshold string
66+
RootfsOveruseEvictionCount int
67+
SharedQoSNamespaceFilter []string
4868
}
4969

5070
func NewRootfsPressureEvictionOptions() *RootfsPressureEvictionOptions {
@@ -58,6 +78,12 @@ func NewRootfsPressureEvictionOptions() *RootfsPressureEvictionOptions {
5878
ReclaimedQoSPodInodesUsedPriorityThreshold: defaultReclaimedQoSPodInodesUsedPriorityThreshold,
5979
MinimumImageFsDiskCapacityThreshold: defaultMinimumImageFsDiskSizeThreshold,
6080
GracePeriod: defaultGracePeriod,
81+
EnableRootfsOveruseEviction: defaultEnableRootfsOveruseEviction,
82+
RootfsOveruseEvictionSupportedQoSLevels: defaultSupportedQoSLevels,
83+
SharedQoSRootfsOveruseThreshold: defaultSharedQoSRootfsOveruseThreshold,
84+
ReclaimedQoSRootfsOveruseThreshold: defaultReclaimedQoSRootfsOveruseThreshold,
85+
RootfsOveruseEvictionCount: defaultRootfsOveruseEvictionCount,
86+
SharedQoSNamespaceFilter: defaultSharedQoSNamespaceFilter,
6187
}
6288
}
6389

@@ -81,6 +107,11 @@ func (o *RootfsPressureEvictionOptions) AddFlags(fss *cliflag.NamedFlagSets) {
81107
"the minimum image fs disk capacity for nodes. the eviction manager will ignore those nodes whose image fs disk capacity is less than this threshold")
82108
fs.Int64Var(&o.GracePeriod, "eviction-rootfs-grace-period", 0,
83109
"the grace period of pod deletion")
110+
111+
fs.BoolVar(&o.EnableRootfsOveruseEviction, "eviction-rootfs-overuse-enable", o.EnableRootfsOveruseEviction, "set true to enable rootfs overuse eviction")
112+
fs.StringSliceVar(&o.RootfsOveruseEvictionSupportedQoSLevels, "eviction-rootfs-overuse-supported-qos-levels", o.RootfsOveruseEvictionSupportedQoSLevels, "the supported qos levels for rootfs overuse eviction, supported qos levels are: shared, reclaimed")
113+
fs.StringVar(&o.SharedQoSRootfsOveruseThreshold, "eviction-rootfs-overuse-shared-qos-threshold", o.SharedQoSRootfsOveruseThreshold, "the shared qos rootfs overuse threshold for shared qos pods. example 500Gi, 20%")
114+
fs.StringVar(&o.ReclaimedQoSRootfsOveruseThreshold, "eviction-rootfs-overuse-reclaimed-qos-threshold", o.ReclaimedQoSRootfsOveruseThreshold, "the reclaimed qos rootfs overuse threshold for reclaimed qos pods. example 500Gi, 20%")
84115
}
85116

86117
func (o *RootfsPressureEvictionOptions) ApplyTo(c *eviction.RootfsPressureEvictionConfiguration) error {
@@ -135,5 +166,25 @@ func (o *RootfsPressureEvictionOptions) ApplyTo(c *eviction.RootfsPressureEvicti
135166
c.MinimumImageFsDiskCapacityThreshold = &value
136167
}
137168
c.GracePeriod = o.GracePeriod
169+
170+
c.EnableRootfsOveruseEviction = o.EnableRootfsOveruseEviction
171+
c.RootfsOveruseEvictionSupportedQoSLevels = o.RootfsOveruseEvictionSupportedQoSLevels
172+
c.RootfsOveruseEvictionCount = o.RootfsOveruseEvictionCount
173+
c.SharedQoSNamespaceFilter = o.SharedQoSNamespaceFilter
174+
if o.SharedQoSRootfsOveruseThreshold != "" {
175+
value, err := eviction.ParseThresholdValue(o.SharedQoSRootfsOveruseThreshold)
176+
if err != nil {
177+
return errors.Wrapf(err, "failed to parse option: 'eviction-rootfs-overuse-shared-qos-rootfs-overuse-threshold'")
178+
}
179+
c.SharedQoSRootfsOveruseThreshold = value
180+
}
181+
if o.ReclaimedQoSRootfsOveruseThreshold != "" {
182+
value, err := eviction.ParseThresholdValue(o.ReclaimedQoSRootfsOveruseThreshold)
183+
if err != nil {
184+
return errors.Wrapf(err, "failed to parse option: 'eviction-rootfs-overuse-reclaimed-qos-rootfs-overuse-threshold'")
185+
}
186+
c.ReclaimedQoSRootfsOveruseThreshold = value
187+
}
188+
138189
return nil
139190
}

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ require (
1919
github.com/google/uuid v1.3.0
2020
github.com/h2non/gock v1.2.0
2121
github.com/klauspost/cpuid/v2 v2.2.6
22-
github.com/kubewharf/katalyst-api v0.5.3-0.20250506060349-cf1eb8ced1f2
22+
github.com/kubewharf/katalyst-api v0.5.4-0.20250526112725-41ffbbe244ee
2323
github.com/moby/sys/mountinfo v0.6.2
2424
github.com/montanaflynn/stats v0.7.1
2525
github.com/opencontainers/runc v1.1.6

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -573,8 +573,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
573573
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
574574
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
575575
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
576-
github.com/kubewharf/katalyst-api v0.5.3-0.20250506060349-cf1eb8ced1f2 h1:0UOLOKhTjJ8JksAm0oq4FmQjlG4k/WGYPQ1zxPRGgxg=
577-
github.com/kubewharf/katalyst-api v0.5.3-0.20250506060349-cf1eb8ced1f2/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k=
576+
github.com/kubewharf/katalyst-api v0.5.4-0.20250526112725-41ffbbe244ee h1:dwv1CubzMbbCLzyb/NAQO6mTK/j8sydc/DO36vBHGUg=
577+
github.com/kubewharf/katalyst-api v0.5.4-0.20250526112725-41ffbbe244ee/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k=
578578
github.com/kubewharf/kubelet v1.24.6-kubewharf.9 h1:jOTYZt7h/J7I8xQMKMUcJjKf5UFBv37jHWvNp5VRFGc=
579579
github.com/kubewharf/kubelet v1.24.6-kubewharf.9/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c=
580580
github.com/kyoh86/exportloopref v0.1.7/go.mod h1:h1rDl2Kdj97+Kwh4gdz3ujE7XHmH51Q0lUiZ1z4NLj8=

pkg/agent/evictionmanager/manager.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ func NewInnerEvictionPluginInitializers() map[string]plugin.InitFunc {
134134
innerEvictionPluginInitializers[memory.EvictionPluginNameRssOveruse] = memory.NewRssOveruseEvictionPlugin
135135
innerEvictionPluginInitializers[rootfs.EvictionPluginNamePodRootfsPressure] = rootfs.NewPodRootfsPressureEvictionPlugin
136136
innerEvictionPluginInitializers[network.EvictionPluginNameNetwork] = network.NewNICEvictionPlugin
137+
innerEvictionPluginInitializers[rootfs.EvictionPluginNamePodRootfsOveruse] = rootfs.NewPodRootfsOveruseEvictionPlugin
137138
return innerEvictionPluginInitializers
138139
}
139140

Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
/*
2+
Copyright 2022 The Katalyst Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package rootfs
18+
19+
import (
20+
"context"
21+
"errors"
22+
"fmt"
23+
"sort"
24+
"time"
25+
26+
v1 "k8s.io/api/core/v1"
27+
"k8s.io/apimachinery/pkg/api/resource"
28+
"k8s.io/apimachinery/pkg/util/sets"
29+
"k8s.io/client-go/tools/events"
30+
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
31+
32+
apiconsts "github.com/kubewharf/katalyst-api/pkg/consts"
33+
pluginapi "github.com/kubewharf/katalyst-api/pkg/protocol/evictionplugin/v1alpha1"
34+
"github.com/kubewharf/katalyst-core/pkg/agent/evictionmanager/plugin"
35+
"github.com/kubewharf/katalyst-core/pkg/client"
36+
"github.com/kubewharf/katalyst-core/pkg/config"
37+
"github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic"
38+
"github.com/kubewharf/katalyst-core/pkg/config/generic"
39+
"github.com/kubewharf/katalyst-core/pkg/consts"
40+
"github.com/kubewharf/katalyst-core/pkg/metaserver"
41+
"github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric/helper"
42+
"github.com/kubewharf/katalyst-core/pkg/metrics"
43+
"github.com/kubewharf/katalyst-core/pkg/util/general"
44+
"github.com/kubewharf/katalyst-core/pkg/util/process"
45+
)
46+
47+
const (
48+
EvictionPluginNamePodRootfsOveruse = "rootfs-overuse-eviction-plugin"
49+
50+
minRootfsOveruseThreshold = 20 * 1024 * 1024 * 1024 // 20GB
51+
minRootfsOverusePercentageThreshold float32 = 0.1
52+
)
53+
54+
// PodRootfsOveruseEvictionPlugin implements the EvictPlugin interface.
55+
type PodRootfsOveruseEvictionPlugin struct {
56+
*process.StopControl
57+
pluginName string
58+
dynamicConfig *dynamic.DynamicAgentConfiguration
59+
metaServer *metaserver.MetaServer
60+
qosConf *generic.QoSConfiguration
61+
emitter metrics.MetricEmitter
62+
}
63+
64+
func NewPodRootfsOveruseEvictionPlugin(_ *client.GenericClientSet, _ events.EventRecorder,
65+
metaServer *metaserver.MetaServer, emitter metrics.MetricEmitter, conf *config.Configuration,
66+
) plugin.EvictionPlugin {
67+
return &PodRootfsOveruseEvictionPlugin{
68+
pluginName: EvictionPluginNamePodRootfsOveruse,
69+
metaServer: metaServer,
70+
StopControl: process.NewStopControl(time.Time{}),
71+
dynamicConfig: conf.DynamicAgentConfiguration,
72+
qosConf: conf.GenericConfiguration.QoSConfiguration,
73+
emitter: emitter,
74+
}
75+
}
76+
77+
func (r *PodRootfsOveruseEvictionPlugin) Name() string {
78+
if r == nil {
79+
return ""
80+
}
81+
82+
return r.pluginName
83+
}
84+
85+
func (r *PodRootfsOveruseEvictionPlugin) Start() {}
86+
87+
func (r *PodRootfsOveruseEvictionPlugin) ThresholdMet(_ context.Context) (*pluginapi.ThresholdMetResponse, error) {
88+
return &pluginapi.ThresholdMetResponse{
89+
MetType: pluginapi.ThresholdMetType_NOT_MET,
90+
}, nil
91+
}
92+
93+
func (r *PodRootfsOveruseEvictionPlugin) GetTopEvictionPods(_ context.Context, _ *pluginapi.GetTopEvictionPodsRequest) (*pluginapi.GetTopEvictionPodsResponse, error) {
94+
return &pluginapi.GetTopEvictionPodsResponse{}, nil
95+
}
96+
97+
func (r *PodRootfsOveruseEvictionPlugin) GetEvictPods(_ context.Context, request *pluginapi.GetEvictPodsRequest) (*pluginapi.GetEvictPodsResponse, error) {
98+
if request == nil {
99+
return nil, fmt.Errorf("GetEvictPods got nil request")
100+
}
101+
102+
rootfsEvictionConfig := r.dynamicConfig.GetDynamicConfiguration().RootfsPressureEvictionConfiguration
103+
if !rootfsEvictionConfig.EnableRootfsOveruseEviction {
104+
return &pluginapi.GetEvictPodsResponse{}, nil
105+
}
106+
107+
if len(request.ActivePods) == 0 {
108+
return &pluginapi.GetEvictPodsResponse{}, nil
109+
}
110+
111+
filterPods := r.filterPods(request.ActivePods)
112+
113+
var usageItemList podUsageList
114+
for _, pod := range filterPods {
115+
threshold := r.getRootfsOveruseThreshold(pod)
116+
if threshold == nil {
117+
continue
118+
}
119+
120+
used, capacity, err := r.getPodRootfsUsed(pod)
121+
if err != nil {
122+
general.Warningf("Failed to get pod rootfs usage for %s: %q", pod.UID, err)
123+
continue
124+
}
125+
126+
if rootfsOveruseThresholdMet(used, capacity, threshold) {
127+
podUsageItem := podUsageItem{
128+
pod: pod,
129+
usage: used,
130+
capacity: capacity,
131+
threshold: getThresholdValue(threshold, capacity),
132+
}
133+
usageItemList = append(usageItemList, podUsageItem)
134+
}
135+
}
136+
137+
if len(usageItemList) == 0 {
138+
return &pluginapi.GetEvictPodsResponse{}, nil
139+
}
140+
141+
sort.Sort(usageItemList)
142+
result := make([]*pluginapi.EvictPod, 0)
143+
deletionOptions := &pluginapi.DeletionOptions{
144+
GracePeriodSeconds: rootfsEvictionConfig.GracePeriod,
145+
}
146+
for i := 0; i < rootfsEvictionConfig.RootfsOveruseEvictionCount && i < len(usageItemList); i++ {
147+
item := usageItemList[i]
148+
evictPod := &pluginapi.EvictPod{
149+
Pod: item.pod,
150+
Reason: fmt.Sprintf("rootfs overuse threshold met, used: %d, threshold: %d", item.usage, item.threshold),
151+
}
152+
if deletionOptions.GracePeriodSeconds > 0 {
153+
evictPod.DeletionOptions = deletionOptions
154+
}
155+
result = append(result, evictPod)
156+
general.InfoS("rootfs overuse threshold met", "pod", item.pod.Name, "used", item.usage, "threshold", item.threshold)
157+
}
158+
return &pluginapi.GetEvictPodsResponse{EvictPods: result}, nil
159+
}
160+
161+
func (r *PodRootfsOveruseEvictionPlugin) getPodRootfsUsed(pod *v1.Pod) (int64, int64, error) {
162+
podRootfsUsed, err := helper.GetPodMetric(r.metaServer.MetricsFetcher, r.emitter, pod, consts.MetricsContainerRootfsUsed, -1)
163+
if err != nil {
164+
return 0, 0, err
165+
}
166+
167+
rootfsCapacity, err := helper.GetNodeMetric(r.metaServer.MetricsFetcher, r.emitter, consts.MetricsImageFsCapacity)
168+
if err != nil {
169+
return 0, 0, err
170+
}
171+
172+
if rootfsCapacity < 1 {
173+
return 0, 0, errors.New("invalid rootfs capacity")
174+
}
175+
176+
return int64(podRootfsUsed), int64(rootfsCapacity), nil
177+
}
178+
179+
func (r *PodRootfsOveruseEvictionPlugin) getRootfsOveruseThreshold(pod *v1.Pod) *evictionapi.ThresholdValue {
180+
qosLevel, err := r.qosConf.GetQoSLevelForPod(pod)
181+
if err != nil {
182+
return nil
183+
}
184+
185+
switch qosLevel {
186+
case string(apiconsts.QoSLevelSharedCores):
187+
return getRootfsOveruseThreshold(r.dynamicConfig.GetDynamicConfiguration().RootfsPressureEvictionConfiguration.SharedQoSRootfsOveruseThreshold)
188+
case string(apiconsts.QoSLevelReclaimedCores):
189+
return getRootfsOveruseThreshold(r.dynamicConfig.GetDynamicConfiguration().RootfsPressureEvictionConfiguration.ReclaimedQoSRootfsOveruseThreshold)
190+
default:
191+
}
192+
return nil
193+
}
194+
195+
func (r *PodRootfsOveruseEvictionPlugin) filterPods(pods []*v1.Pod) []*v1.Pod {
196+
supportedQoSLevels := sets.NewString(r.dynamicConfig.GetDynamicConfiguration().RootfsPressureEvictionConfiguration.RootfsOveruseEvictionSupportedQoSLevels...)
197+
sharedQoSNamespaceFilter := sets.NewString(r.dynamicConfig.GetDynamicConfiguration().RootfsPressureEvictionConfiguration.SharedQoSNamespaceFilter...)
198+
199+
filteredPods := make([]*v1.Pod, 0, len(pods))
200+
for _, pod := range pods {
201+
qosLevel, err := r.qosConf.GetQoSLevelForPod(pod)
202+
if err != nil {
203+
continue
204+
}
205+
if !supportedQoSLevels.Has(qosLevel) {
206+
continue
207+
}
208+
if qosLevel == apiconsts.PodAnnotationQoSLevelSharedCores && !sharedQoSNamespaceFilter.Has(pod.Namespace) {
209+
continue
210+
}
211+
filteredPods = append(filteredPods, pod)
212+
}
213+
return filteredPods
214+
}
215+
216+
func rootfsOveruseThresholdMet(used, capacity int64, threshold *evictionapi.ThresholdValue) bool {
217+
if threshold == nil {
218+
return false
219+
}
220+
if thresholdValue := getThresholdValue(threshold, capacity); used > thresholdValue {
221+
return true
222+
}
223+
return false
224+
}
225+
226+
func getThresholdValue(threshold *evictionapi.ThresholdValue, capacity int64) int64 {
227+
if threshold == nil {
228+
return 0
229+
}
230+
if threshold.Quantity != nil {
231+
return threshold.Quantity.Value()
232+
}
233+
if threshold.Percentage > 0 && threshold.Percentage < 1 {
234+
return int64(float32(capacity) * threshold.Percentage)
235+
}
236+
return 0
237+
}
238+
239+
func getRootfsOveruseThreshold(threshold *evictionapi.ThresholdValue) *evictionapi.ThresholdValue {
240+
if threshold == nil {
241+
return nil
242+
}
243+
if threshold.Quantity != nil && threshold.Quantity.Value() < minRootfsOveruseThreshold {
244+
return &evictionapi.ThresholdValue{
245+
Quantity: resource.NewQuantity(minRootfsOveruseThreshold, resource.BinarySI),
246+
}
247+
}
248+
if threshold.Percentage > 0 && threshold.Percentage < 1 && threshold.Percentage < minRootfsOverusePercentageThreshold {
249+
return &evictionapi.ThresholdValue{
250+
Percentage: minRootfsOverusePercentageThreshold,
251+
}
252+
}
253+
return threshold
254+
}

0 commit comments

Comments
 (0)