diff --git a/python/kubeflow/trainer/api/trainer_client_test.py b/python/kubeflow/trainer/api/trainer_client_test.py index e3f56c69..9c3dbb39 100644 --- a/python/kubeflow/trainer/api/trainer_client_test.py +++ b/python/kubeflow/trainer/api/trainer_client_test.py @@ -450,9 +450,6 @@ def create_cluster_training_runtime( metadata=models.IoK8sApimachineryPkgApisMetaV1ObjectMeta( name=name, namespace=namespace, - labels={ - "trainer.kubeflow.org/accelerator": "gpu-tesla-v100-16gb", - }, ), spec=models.TrainerV1alpha1TrainingRuntimeSpec( mlPolicy=models.TrainerV1alpha1MLPolicy( @@ -514,7 +511,6 @@ def create_runtime_type( trainer_type=types.TrainerType.CUSTOM_TRAINER, framework=types.Framework.TORCH, entrypoint=[constants.TORCH_ENTRYPOINT], - accelerator="gpu-tesla-v100-16gb", accelerator_count=4, ), ) @@ -541,7 +537,6 @@ def get_train_job_data_type( trainer_type=types.TrainerType.CUSTOM_TRAINER, framework=types.Framework.TORCH, entrypoint=["torchrun"], - accelerator="gpu-tesla-v100-16gb", accelerator_count=4, ), ), diff --git a/python/kubeflow/trainer/constants/constants.py b/python/kubeflow/trainer/constants/constants.py index fbcd24c1..d21da279 100644 --- a/python/kubeflow/trainer/constants/constants.py +++ b/python/kubeflow/trainer/constants/constants.py @@ -72,10 +72,6 @@ # single VM where distributed training code is executed. NODE = "node" -# The label key to identify the accelerator type for model training (e.g. GPU-Tesla-V100-16GB). -# TODO: Potentially, we should take this from the Node selectors. -ACCELERATOR_LABEL = "trainer.kubeflow.org/accelerator" - # Unknown indicates that the value can't be identified. UNKNOWN = "Unknown" diff --git a/python/kubeflow/trainer/types/types.py b/python/kubeflow/trainer/types/types.py index d3390957..0d9c5ed9 100644 --- a/python/kubeflow/trainer/types/types.py +++ b/python/kubeflow/trainer/types/types.py @@ -167,7 +167,6 @@ class Trainer: trainer_type: TrainerType framework: Framework entrypoint: Optional[List[str]] = None - accelerator: str = constants.UNKNOWN accelerator_count: Union[str, float, int] = constants.UNKNOWN diff --git a/python/kubeflow/trainer/utils/utils.py b/python/kubeflow/trainer/utils/utils.py index 4225bb8a..85783732 100644 --- a/python/kubeflow/trainer/utils/utils.py +++ b/python/kubeflow/trainer/utils/utils.py @@ -140,14 +140,6 @@ def get_runtime_trainer( if isinstance(trainer.accelerator_count, (int, float)) and ml_policy.num_nodes: trainer.accelerator_count *= ml_policy.num_nodes - # TODO (andreyvelich): Currently, we get the accelerator type from - # the runtime labels. - if ( - runtime_metadata.labels - and constants.ACCELERATOR_LABEL in runtime_metadata.labels - ): - trainer.accelerator = runtime_metadata.labels[constants.ACCELERATOR_LABEL] - return trainer