Skip to content

Commit 1d22194

Browse files
authored
Merge pull request #774 from h-w-chen/dev/pap-p1-no-evict
fix(sysadvisor): pap - level p1 action plan not to evict
2 parents fd243ab + 682ef1a commit 1d22194

File tree

4 files changed

+66
-9
lines changed

4 files changed

+66
-9
lines changed

pkg/agent/sysadvisor/plugin/poweraware/advisor/action/strategy/evict_first.go

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ type CapperProber interface {
4949
// evictFirstStrategy always attempts to evict low priority pods if any; only after all are exhausted will it resort to DVFS means.
5050
// besides, it will continue to try the best to meet the alert spec, regardless of the alert update time.
5151
// alert level has the following meanings in this strategy:
52-
// P1 - eviction only;
52+
// P2 - noop and expecting scheduler to bias against the node
53+
// P1 - noop and expecting scheduler not to schedule to the node
5354
// P0 - evict if applicable; otherwise conduct DVFS once if needed (DVFS is limited to 10%);
5455
// S0 - DVFS in urgency (no limit on DVFS)
5556
type evictFirstStrategy struct {
@@ -95,16 +96,17 @@ func (e *evictFirstStrategy) recommendEvictFirstOp() spec.InternalOp {
9596

9697
func (e *evictFirstStrategy) recommendOp(alert spec.PowerAlert, internalOp spec.InternalOp) spec.InternalOp {
9798
if internalOp != spec.InternalOpAuto {
98-
return internalOp
99+
// internal op is only applicable to dvfs related levels, i.e. s0 + p0
100+
if alert == spec.PowerAlertS0 || alert == spec.PowerAlertP0 {
101+
return internalOp
102+
}
99103
}
100104

101105
switch alert {
102106
case spec.PowerAlertS0:
103107
return spec.InternalOpFreqCap
104108
case spec.PowerAlertP0:
105109
return e.recommendEvictFirstOp()
106-
case spec.PowerAlertP1:
107-
return spec.InternalOpEvict
108110
default:
109111
return spec.InternalOpNoop
110112
}
@@ -126,8 +128,8 @@ func (e *evictFirstStrategy) adjustTargetForConstraintDVFS(actualWatt, desiredWa
126128
func (e *evictFirstStrategy) yieldActionPlan(op, internalOp spec.InternalOp, actualWatt, desiredWatt int, alert spec.PowerAlert, ttl time.Duration) action.PowerAction {
127129
switch op {
128130
case spec.InternalOpFreqCap:
129-
// try to conduct freq capping within the allowed limit if not set for hard dvfs
130-
if internalOp != spec.InternalOpFreqCap && !(alert == spec.PowerAlertS0 && internalOp == spec.InternalOpAuto) {
131+
// try to conduct freq capping within the allowed limit except for the unconstrained dvfs
132+
if alert != spec.PowerAlertS0 {
131133
var err error
132134
desiredWatt, err = e.adjustTargetForConstraintDVFS(actualWatt, desiredWatt)
133135
if err != nil {

pkg/agent/sysadvisor/plugin/poweraware/advisor/action/strategy/evict_first_test.go

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,23 +69,55 @@ func Test_evictFirstStrategy_RecommendAction(t *testing.T) {
6969
wantInDVFS bool
7070
}{
7171
{
72-
name: "internal op is always respected if exists",
72+
name: "plan of s0 always targets full range",
7373
fields: fields{
7474
coefficient: exponentialDecay{},
7575
evictableProber: nil,
7676
dvfsUsed: 0,
7777
},
7878
args: args{
79+
alert: spec.PowerAlertS0,
7980
actualWatt: 100,
8081
desiredWatt: 80,
81-
internalOp: spec.InternalOpFreqCap,
8282
},
8383
want: action.PowerAction{
8484
Op: spec.InternalOpFreqCap,
8585
Arg: 80,
8686
},
8787
wantInDVFS: true,
8888
},
89+
{
90+
name: "plan of p0 is constraint when allowing dvfs only",
91+
fields: fields{
92+
coefficient: exponentialDecay{},
93+
evictableProber: nil,
94+
dvfsUsed: 0,
95+
},
96+
args: args{
97+
alert: spec.PowerAlertP0,
98+
actualWatt: 100,
99+
desiredWatt: 80,
100+
internalOp: spec.InternalOpFreqCap,
101+
},
102+
want: action.PowerAction{
103+
Op: spec.InternalOpFreqCap,
104+
Arg: 90,
105+
},
106+
wantInDVFS: true,
107+
},
108+
{
109+
name: "p1 is noop",
110+
fields: fields{},
111+
args: args{
112+
actualWatt: 100,
113+
desiredWatt: 80,
114+
alert: spec.PowerAlertP1,
115+
},
116+
want: action.PowerAction{
117+
Op: spec.InternalOpNoop,
118+
Arg: 0,
119+
},
120+
},
89121
{
90122
name: "p2 is noop",
91123
fields: fields{},

pkg/agent/sysadvisor/plugin/poweraware/reader/metric_store_power_reader.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,12 @@ func (m *metricStorePowerReader) Get(ctx context.Context) (int, error) {
5555
func (m *metricStorePowerReader) get(ctx context.Context, now time.Time) (int, error) {
5656
data, err := m.GetNodeMetric(consts.MetricTotalPowerUsedWatts)
5757
if err != nil {
58-
return 0, errors.Wrap(err, "failed to get metric from metric store")
58+
return 0, errors.Wrap(err, "failed to get power usage from metric store")
59+
}
60+
61+
// 0 actually is error, typically caused by null response from malachite realtime power service
62+
if data.Value == 0 {
63+
return 0, errors.New("got invalid 0 power usage from metric store")
5964
}
6065

6166
if !isDataFresh(data, now) {

pkg/agent/sysadvisor/plugin/poweraware/reader/metric_store_power_reader_test.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ import (
2121
"testing"
2222
"time"
2323

24+
"github.com/stretchr/testify/assert"
25+
2426
utilmetric "github.com/kubewharf/katalyst-core/pkg/util/metric"
2527
)
2628

@@ -101,3 +103,19 @@ func Test_metricStorePowerReader_get(t *testing.T) {
101103
})
102104
}
103105
}
106+
107+
func Test_metricStorePowerReader_get_0_is_error(t *testing.T) {
108+
t.Parallel()
109+
110+
setTime := time.Date(2024, 11, 11, 8, 30, 10, 0, time.UTC)
111+
dummyMetricStore := utilmetric.NewMetricStore()
112+
dummyMetricStore.SetNodeMetric("total.power.used.watts", utilmetric.MetricData{
113+
Value: 0,
114+
Time: &setTime,
115+
})
116+
117+
m := NewMetricStorePowerReader(dummyMetricStore).(*metricStorePowerReader)
118+
now := time.Date(2024, 11, 11, 8, 30, 11, 0, time.UTC)
119+
_, err := m.get(context.TODO(), now)
120+
assert.Error(t, err)
121+
}

0 commit comments

Comments
 (0)