Skip to content

Commit 3c445fa

Browse files
authored
Merge pull request #797 from lihonghao314/dev/borwein-log
feat(sysadvisor): remove borwein v1 model
2 parents 97eeaa9 + 4e37143 commit 3c445fa

File tree

11 files changed

+130
-882
lines changed

11 files changed

+130
-882
lines changed

cmd/katalyst-agent/app/options/sysadvisor/qosaware/model/borwein/borwein.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ import (
2626
)
2727

2828
type BorweinOptions struct {
29-
InferenceServiceSocketAbsPath string
3029
ModelNameToInferenceSvcSockAbsPath map[string]string
3130
FeatureDescriptionFilePath string
3231
NodeFeatureNames []string
@@ -43,8 +42,6 @@ func NewBorweinOptions() *BorweinOptions {
4342

4443
// AddFlags adds flags to the specified FlagSet.
4544
func (o *BorweinOptions) AddFlags(fs *pflag.FlagSet) {
46-
fs.StringVar(&o.InferenceServiceSocketAbsPath, "borwein-inference-svc-socket-path", o.InferenceServiceSocketAbsPath,
47-
"socket path which borwein inference server listens at. it's deprecated, use borwein-inference-model-to-svc-socket-path intead")
4845
fs.StringToStringVar(&o.ModelNameToInferenceSvcSockAbsPath, "borwein-inference-model-to-svc-socket-path", o.ModelNameToInferenceSvcSockAbsPath,
4946
"model name to socket path which its borwein inference server listens at")
5047
fs.StringVar(&o.FeatureDescriptionFilePath, "feature-description-filepath", o.FeatureDescriptionFilePath,
@@ -63,7 +60,6 @@ func (o *BorweinOptions) ApplyTo(c *borwein.BorweinConfiguration) error {
6360
ContainerFeatureNames []string `json:"container_feature_names"`
6461
}{}
6562

66-
c.InferenceServiceSocketAbsPath = o.InferenceServiceSocketAbsPath
6763
c.ModelNameToInferenceSvcSockAbsPath = o.ModelNameToInferenceSvcSockAbsPath
6864

6965
if len(o.NodeFeatureNames)+len(o.ContainerFeatureNames) > 0 {

pkg/agent/sysadvisor/metacache/metacache_test.go

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
2828

2929
borweinconsts "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/inference/models/borwein/consts"
30+
borweinutils "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/inference/models/borwein/utils"
3031
"github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/types"
3132
"github.com/kubewharf/katalyst-core/pkg/metrics"
3233
)
@@ -53,11 +54,11 @@ func TestMetaCacheImp_GetFilteredInferenceResult(t *testing.T) {
5354
fields: fields{
5455
emitter: metrics.DummyMetrics{},
5556
modelToResult: map[string]interface{}{
56-
borweinconsts.ModelNameBorwein: []int{1, 2, 3},
57+
borweinutils.GetInferenceResultKey(borweinconsts.ModelNameBorweinLatencyRegression): []int{1, 2, 3},
5758
},
5859
},
5960
args: args{
60-
modelName: borweinconsts.ModelNameBorwein,
61+
modelName: borweinconsts.ModelNameBorweinLatencyRegression,
6162
},
6263
want: []int{1, 2, 3},
6364
wantErr: false,
@@ -67,7 +68,7 @@ func TestMetaCacheImp_GetFilteredInferenceResult(t *testing.T) {
6768
fields: fields{
6869
emitter: metrics.DummyMetrics{},
6970
modelToResult: map[string]interface{}{
70-
borweinconsts.ModelNameBorwein: []int{1, 2, 3},
71+
borweinutils.GetInferenceResultKey(borweinconsts.ModelNameBorweinLatencyRegression): []int{1, 2, 3},
7172
},
7273
},
7374
args: args{
@@ -88,7 +89,7 @@ func TestMetaCacheImp_GetFilteredInferenceResult(t *testing.T) {
8889

8990
return filteredResult, nil
9091
},
91-
modelName: borweinconsts.ModelNameBorwein,
92+
modelName: borweinconsts.ModelNameBorweinLatencyRegression,
9293
},
9394
want: []int{1, 2},
9495
wantErr: false,
@@ -98,7 +99,7 @@ func TestMetaCacheImp_GetFilteredInferenceResult(t *testing.T) {
9899
fields: fields{
99100
emitter: metrics.DummyMetrics{},
100101
modelToResult: map[string]interface{}{
101-
borweinconsts.ModelNameBorwein: []string{"1", "2", "3"},
102+
borweinutils.GetInferenceResultKey(borweinconsts.ModelNameBorweinLatencyRegression): []string{"1", "2", "3"},
102103
},
103104
},
104105
args: args{
@@ -119,7 +120,7 @@ func TestMetaCacheImp_GetFilteredInferenceResult(t *testing.T) {
119120

120121
return filteredResult, nil
121122
},
122-
modelName: borweinconsts.ModelNameBorwein,
123+
modelName: borweinconsts.ModelNameBorweinLatencyRegression,
123124
},
124125
want: nil,
125126
wantErr: true,
@@ -133,7 +134,7 @@ func TestMetaCacheImp_GetFilteredInferenceResult(t *testing.T) {
133134
emitter: tt.fields.emitter,
134135
modelToResult: tt.fields.modelToResult,
135136
}
136-
got, err := mc.GetFilteredInferenceResult(tt.args.filterFunc, tt.args.modelName)
137+
got, err := mc.GetFilteredInferenceResult(tt.args.filterFunc, borweinutils.GetInferenceResultKey(tt.args.modelName))
137138
if (err != nil) != tt.wantErr {
138139
t.Errorf("MetaCacheImp.GetFilteredInferenceResult() error = %v, wantErr %v", err, tt.wantErr)
139140
return
@@ -166,11 +167,11 @@ func TestMetaCacheImp_GetInferenceResult(t *testing.T) {
166167
fields: fields{
167168
emitter: metrics.DummyMetrics{},
168169
modelToResult: map[string]interface{}{
169-
borweinconsts.ModelNameBorwein: []int{1, 2, 3},
170+
borweinconsts.ModelNameBorweinLatencyRegression: []int{1, 2, 3},
170171
},
171172
},
172173
args: args{
173-
modelName: borweinconsts.ModelNameBorwein,
174+
modelName: borweinconsts.ModelNameBorweinLatencyRegression,
174175
},
175176
want: []int{1, 2, 3},
176177
wantErr: false,
@@ -221,7 +222,7 @@ func TestMetaCacheImp_SetInferenceResult(t *testing.T) {
221222
modelToResult: make(map[string]interface{}),
222223
},
223224
args: args{
224-
modelName: borweinconsts.ModelNameBorwein,
225+
modelName: borweinconsts.ModelNameBorweinLatencyRegression,
225226
result: []int{1, 2, 3},
226227
},
227228
want: []int{1, 2, 3},

pkg/agent/sysadvisor/plugin/inference/modelresultfetcher/borwein/borwein.go

Lines changed: 33 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ import (
3232

3333
"github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/metacache"
3434
"github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/inference/modelresultfetcher"
35-
borweinconsts "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/inference/models/borwein/consts"
3635
borweininfsvc "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/inference/models/borwein/inferencesvc"
3736
borweintypes "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/inference/models/borwein/types"
3837
borweinutils "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/inference/models/borwein/utils"
@@ -62,14 +61,12 @@ type BorweinModelResultFetcher struct {
6261
name string
6362
qosConfig *generic.QoSConfiguration
6463

65-
nodeFeatureNames []string // handled by GetNodeFeature
66-
containerFeatureNames []string // handled by GetContainerFeature
67-
inferenceServiceSocketAbsPath string
64+
nodeFeatureNames []string // handled by GetNodeFeature
65+
containerFeatureNames []string // handled by GetContainerFeature
6866
modelNameToInferenceSvcSockAbsPath map[string]string // map modelName to inference server sock path
6967

7068
emitter metrics.MetricEmitter
7169

72-
infSvcClient borweininfsvc.InferenceServiceClient
7370
modelNameToInferenceSvcClient map[string]borweininfsvc.InferenceServiceClient // map modelName to its inference client
7471
clientLock sync.RWMutex
7572
}
@@ -137,7 +134,7 @@ func (bmrf *BorweinModelResultFetcher) FetchModelResult(ctx context.Context, met
137134
metaWriter metacache.MetaWriter, metaServer *metaserver.MetaServer,
138135
) error {
139136
bmrf.clientLock.RLock()
140-
if bmrf.infSvcClient == nil && len(bmrf.modelNameToInferenceSvcClient) == 0 {
137+
if len(bmrf.modelNameToInferenceSvcClient) == 0 {
141138
bmrf.clientLock.RUnlock()
142139
return fmt.Errorf("infSvcClient isn't initialized")
143140
}
@@ -175,16 +172,7 @@ func (bmrf *BorweinModelResultFetcher) FetchModelResult(ctx context.Context, met
175172
return fmt.Errorf("getInferenceRequestForPods failed with error: %v", err)
176173
}
177174

178-
bmrf.clientLock.RLock()
179-
var infSvcClients map[string]borweininfsvc.InferenceServiceClient
180-
if len(bmrf.modelNameToInferenceSvcClient) > 0 {
181-
infSvcClients = bmrf.modelNameToInferenceSvcClient
182-
} else {
183-
infSvcClients = map[string]borweininfsvc.InferenceServiceClient{
184-
borweinconsts.ModelNameBorwein: bmrf.infSvcClient,
185-
}
186-
}
187-
bmrf.clientLock.RUnlock()
175+
infSvcClients := bmrf.modelNameToInferenceSvcClient
188176

189177
errCh := make(chan error, len(infSvcClients))
190178
for modelName, client := range infSvcClients {
@@ -380,57 +368,44 @@ func (bmrf *BorweinModelResultFetcher) initInferenceSvcClientConn() (bool, error
380368
// todo: emit metrics when initializing client connection failed
381369

382370
// never success
383-
if bmrf.inferenceServiceSocketAbsPath == "" && len(bmrf.modelNameToInferenceSvcSockAbsPath) == 0 {
371+
if len(bmrf.modelNameToInferenceSvcSockAbsPath) == 0 {
384372
return false, fmt.Errorf("empty inference service socks information")
385373
}
386374

387-
if len(bmrf.modelNameToInferenceSvcSockAbsPath) > 0 {
388-
modelNameToConn := make(map[string]*grpc.ClientConn, len(bmrf.modelNameToInferenceSvcSockAbsPath))
389-
390-
allSuccess := true
391-
for modelName, sockAbsPath := range bmrf.modelNameToInferenceSvcSockAbsPath {
392-
infSvcConn, err := process.Dial(sockAbsPath, 5*time.Second)
393-
if err != nil {
394-
general.Errorf("get inference svc connection with socket: %s for model: %s failed with error",
395-
sockAbsPath, modelName)
396-
allSuccess = false
397-
break
398-
}
399-
general.Infof("init inference svc connection with socket: %s for model: %s success", sockAbsPath, modelName)
400-
401-
modelNameToConn[modelName] = infSvcConn
402-
}
375+
modelNameToConn := make(map[string]*grpc.ClientConn, len(bmrf.modelNameToInferenceSvcSockAbsPath))
403376

404-
if !allSuccess {
405-
for modelName, conn := range modelNameToConn {
406-
err := conn.Close()
407-
if err != nil {
408-
general.Errorf("close connection for model: %s failed with error: %v",
409-
modelName, err)
410-
}
411-
}
412-
} else {
413-
bmrf.clientLock.Lock()
414-
bmrf.modelNameToInferenceSvcClient = make(map[string]borweininfsvc.InferenceServiceClient, len(modelNameToConn))
415-
for modelName, conn := range modelNameToConn {
416-
bmrf.modelNameToInferenceSvcClient[modelName] = borweininfsvc.NewInferenceServiceClient(conn)
417-
}
418-
bmrf.clientLock.Unlock()
377+
allSuccess := true
378+
for modelName, sockAbsPath := range bmrf.modelNameToInferenceSvcSockAbsPath {
379+
infSvcConn, err := process.Dial(sockAbsPath, 5*time.Second)
380+
if err != nil {
381+
general.Errorf("get inference svc connection with socket: %s for model: %s failed with error",
382+
sockAbsPath, modelName)
383+
allSuccess = false
384+
break
419385
}
386+
general.Infof("init inference svc connection with socket: %s for model: %s success", sockAbsPath, modelName)
420387

421-
return allSuccess, nil
388+
modelNameToConn[modelName] = infSvcConn
422389
}
423390

424-
infSvcConn, err := process.Dial(bmrf.inferenceServiceSocketAbsPath, 5*time.Second)
425-
if err != nil {
426-
general.Errorf("get inference svc connection with socket: %s failed with error: %v", bmrf.inferenceServiceSocketAbsPath, err)
427-
return false, nil
391+
if !allSuccess {
392+
for modelName, conn := range modelNameToConn {
393+
err := conn.Close()
394+
if err != nil {
395+
general.Errorf("close connection for model: %s failed with error: %v",
396+
modelName, err)
397+
}
398+
}
399+
} else {
400+
bmrf.clientLock.Lock()
401+
bmrf.modelNameToInferenceSvcClient = make(map[string]borweininfsvc.InferenceServiceClient, len(modelNameToConn))
402+
for modelName, conn := range modelNameToConn {
403+
bmrf.modelNameToInferenceSvcClient[modelName] = borweininfsvc.NewInferenceServiceClient(conn)
404+
}
405+
bmrf.clientLock.Unlock()
428406
}
429407

430-
bmrf.clientLock.Lock()
431-
bmrf.infSvcClient = borweininfsvc.NewInferenceServiceClient(infSvcConn)
432-
bmrf.clientLock.Unlock()
433-
return true, nil
408+
return allSuccess, nil
434409
}
435410

436411
func NewBorweinModelResultFetcher(fetcherName string, conf *config.Configuration, extraConf interface{},
@@ -455,11 +430,10 @@ func NewBorweinModelResultFetcher(fetcherName string, conf *config.Configuration
455430
qosConfig: conf.QoSConfiguration,
456431
nodeFeatureNames: conf.BorweinConfiguration.NodeFeatureNames,
457432
containerFeatureNames: conf.BorweinConfiguration.ContainerFeatureNames,
458-
inferenceServiceSocketAbsPath: conf.BorweinConfiguration.InferenceServiceSocketAbsPath,
459433
modelNameToInferenceSvcSockAbsPath: conf.BorweinConfiguration.ModelNameToInferenceSvcSockAbsPath,
460434
}
461435

462-
// fetcher initializing doesn't block sys-adviosr main process
436+
// fetcher initializing doesn't block sys-advisor main process
463437
go func() {
464438
err := wait.PollImmediateInfinite(5*time.Second, bmrf.initInferenceSvcClientConn)
465439
if err != nil {

0 commit comments

Comments
 (0)