-
Notifications
You must be signed in to change notification settings - Fork 834
feat(api): Sync TrainJob JobsStatus from JobSet ReplicatedJobsStatus #2802
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
223f1b5
783630d
bfe4234
a1287d3
efc5e9d
ab46fa6
fc55beb
7d34b6e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,7 +20,6 @@ import ( | |
"context" | ||
"errors" | ||
|
||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/apimachinery/pkg/util/validation/field" | ||
"sigs.k8s.io/controller-runtime/pkg/client" | ||
"sigs.k8s.io/controller-runtime/pkg/webhook/admission" | ||
|
@@ -32,7 +31,7 @@ import ( | |
index "github.com/kubeflow/trainer/v2/pkg/runtime/indexer" | ||
) | ||
|
||
var errorTooManyTerminalConditionPlugin = errors.New("too many TerminalCondition plugins are registered") | ||
var errorTooManyTrainJobStatusPlugin = errors.New("too many TrainJobStatus plugins are registered") | ||
|
||
type Framework struct { | ||
registry fwkplugins.Registry | ||
|
@@ -43,7 +42,7 @@ type Framework struct { | |
watchExtensionPlugins []framework.WatchExtensionPlugin | ||
podNetworkPlugins []framework.PodNetworkPlugin | ||
componentBuilderPlugins []framework.ComponentBuilderPlugin | ||
terminalConditionPlugins []framework.TerminalConditionPlugin | ||
trainJobStatusPlugin framework.TrainJobStatusPlugin | ||
} | ||
|
||
func New(ctx context.Context, c client.Client, r fwkplugins.Registry, indexer client.FieldIndexer) (*Framework, error) { | ||
|
@@ -79,8 +78,11 @@ func New(ctx context.Context, c client.Client, r fwkplugins.Registry, indexer cl | |
if p, ok := plugin.(framework.ComponentBuilderPlugin); ok { | ||
f.componentBuilderPlugins = append(f.componentBuilderPlugins, p) | ||
} | ||
if p, ok := plugin.(framework.TerminalConditionPlugin); ok { | ||
f.terminalConditionPlugins = append(f.terminalConditionPlugins, p) | ||
if p, ok := plugin.(framework.TrainJobStatusPlugin); ok { | ||
if f.trainJobStatusPlugin != nil { | ||
return nil, errorTooManyTrainJobStatusPlugin | ||
} | ||
f.trainJobStatusPlugin = p | ||
} | ||
} | ||
f.plugins = plugins | ||
|
@@ -141,13 +143,9 @@ func (f *Framework) RunComponentBuilderPlugins(ctx context.Context, info *runtim | |
return objs, nil | ||
} | ||
|
||
func (f *Framework) RunTerminalConditionPlugins(ctx context.Context, trainJob *trainer.TrainJob) (*metav1.Condition, error) { | ||
// TODO (tenzen-y): Once we provide the Configuration API, we should validate which plugin should have terminalCondition execution points. | ||
if len(f.terminalConditionPlugins) > 1 { | ||
Comment on lines
-145
to
-146
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any reason why we can remove the verification of the number of plugins? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As we discussed here, users won't be able to register more than one plugin for terminal condition: #2802 (comment) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, the check is now moved at initialization, rather than at (first) execution. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, right. The new way makes more sense. |
||
return nil, errorTooManyTerminalConditionPlugin | ||
} | ||
if len(f.terminalConditionPlugins) != 0 { | ||
return f.terminalConditionPlugins[0].TerminalCondition(ctx, trainJob) | ||
func (f *Framework) RunTrainJobStatusPlugin(ctx context.Context, trainJob *trainer.TrainJob) (*trainer.TrainJobStatus, error) { | ||
if f.trainJobStatusPlugin != nil { | ||
return f.trainJobStatusPlugin.Status(ctx, trainJob) | ||
} | ||
return nil, nil | ||
} | ||
|
Uh oh!
There was an error while loading. Please reload this page.