From 393a0be60d9ce85080986fc9b9e2960b4113a001 Mon Sep 17 00:00:00 2001 From: sailesh duddupudi Date: Tue, 18 Mar 2025 18:58:02 +0000 Subject: [PATCH 1/5] Update Manifest Images to GHCR Signed-off-by: sailesh duddupudi --- manifests/overlays/kubeflow/kustomization.yaml | 4 ++-- manifests/overlays/standalone/kustomization.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/manifests/overlays/kubeflow/kustomization.yaml b/manifests/overlays/kubeflow/kustomization.yaml index 7a343ac6f4..9f2301c8c5 100644 --- a/manifests/overlays/kubeflow/kustomization.yaml +++ b/manifests/overlays/kubeflow/kustomization.yaml @@ -5,8 +5,8 @@ resources: - ../../base - kubeflow-training-roles.yaml images: - - name: kubeflow/training-operator - newTag: v1-5170a36 + - name: ghcr.io/kubeflow/training/training-operator + newTag: v1-f654b1e # TODO (tenzen-y): Once we support cert-manager, we need to remove this secret generation. # REF: https://github.com/kubeflow/training-operator/issues/2049 secretGenerator: diff --git a/manifests/overlays/standalone/kustomization.yaml b/manifests/overlays/standalone/kustomization.yaml index 0e01f4a8e9..8bf594b784 100644 --- a/manifests/overlays/standalone/kustomization.yaml +++ b/manifests/overlays/standalone/kustomization.yaml @@ -5,8 +5,8 @@ resources: - ../../base - namespace.yaml images: - - name: kubeflow/training-operator - newTag: v1-5170a36 + - name: ghcr.io/kubeflow/training/training-operator + newTag: v1-f654b1e secretGenerator: - name: training-operator-webhook-cert options: From 9a2f5005afdb54335d69232fc2e3e1bc99d05c33 Mon Sep 17 00:00:00 2001 From: sailesh duddupudi Date: Tue, 18 Mar 2025 19:43:21 +0000 Subject: [PATCH 2/5] Update examples, SDK and pkg images to GHCR Signed-off-by: sailesh duddupudi --- Makefile | 2 +- examples/jax/cpu-demo/demo.yaml | 2 +- .../jax-dist-spmd-mnist/jaxjob_dist_spmd_mnist_gloo.yaml | 2 +- examples/pytorch/deepspeed-demo/pytorch_deepspeed_demo.yaml | 4 ++-- examples/pytorch/elastic/echo/echo.yaml | 2 +- examples/pytorch/elastic/imagenet/imagenet.yaml | 2 +- .../pytorch/image-classification/create-pytorchjob.ipynb | 2 +- examples/pytorch/mnist/v1/pytorch_job_mnist_gloo.yaml | 4 ++-- examples/pytorch/mnist/v1/pytorch_job_mnist_mpi.yaml | 4 ++-- examples/pytorch/mnist/v1/pytorch_job_mnist_nccl.yaml | 4 ++-- examples/pytorch/smoke-dist/pytorch_job_sendrecv.yaml | 4 ++-- examples/tensorflow/dist-mnist/tf_job_mnist.yaml | 6 +++--- .../distribution_strategy/multi_worker_tfjob.yaml | 2 +- examples/tensorflow/mnist_with_summaries/tf_job_mnist.yaml | 2 +- examples/tensorflow/simple.yaml | 2 +- .../lightgbm-dist/xgboostjob_v1_lightgbm_dist_training.yaml | 4 ++-- examples/xgboost/smoke-dist/xgboostjob_v1_rabit_test.yaml | 4 ++-- .../xgboost/smoke-dist/xgboostjob_v1alpha1_rabit_test.yaml | 4 ++-- .../xgboost/xgboost-dist/xgboostjob_v1_iris_predict.yaml | 4 ++-- .../xgboost-dist/xgboostjob_v1_iris_predict_local.yaml | 4 ++-- examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train.yaml | 4 ++-- .../xgboost-dist/xgboostjob_v1_iris_train_local.yaml | 4 ++-- examples/xgboost/xgboostjob.yaml | 4 ++-- pkg/config/config.go | 2 +- sdk/python/kubeflow/training/constants/constants.py | 6 +++--- 25 files changed, 42 insertions(+), 42 deletions(-) diff --git a/Makefile b/Makefile index eeed3843ae..d9908ac512 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # Image URL to use all building/pushing image targets -IMG ?= kubeflow/training-operator:latest +IMG ?= ghcr.io/kubeflow/training/training-operator:latest # CRD generation options CRD_OPTIONS ?= "crd:generateEmbeddedObjectMeta=true,maxDescLen=400" diff --git a/examples/jax/cpu-demo/demo.yaml b/examples/jax/cpu-demo/demo.yaml index bffd3cc16f..a5adef5b36 100644 --- a/examples/jax/cpu-demo/demo.yaml +++ b/examples/jax/cpu-demo/demo.yaml @@ -12,7 +12,7 @@ spec: spec: containers: - name: jax - image: docker.io/kubeflow/jaxjob-simple:latest + image: ghcr.io/kubeflow/training/jaxjob-simple:latest command: - "python3" - "train.py" diff --git a/examples/jax/jax-dist-spmd-mnist/jaxjob_dist_spmd_mnist_gloo.yaml b/examples/jax/jax-dist-spmd-mnist/jaxjob_dist_spmd_mnist_gloo.yaml index e124b2efef..ce8b1497ad 100644 --- a/examples/jax/jax-dist-spmd-mnist/jaxjob_dist_spmd_mnist_gloo.yaml +++ b/examples/jax/jax-dist-spmd-mnist/jaxjob_dist_spmd_mnist_gloo.yaml @@ -12,5 +12,5 @@ spec: spec: containers: - name: jax - image: docker.io/kubeflow/jaxjob-dist-spmd-mnist:latest + image: ghcr.io/kubeflow/training/jaxjob-dist-spmd-mnist:latest imagePullPolicy: Always diff --git a/examples/pytorch/deepspeed-demo/pytorch_deepspeed_demo.yaml b/examples/pytorch/deepspeed-demo/pytorch_deepspeed_demo.yaml index 25a07e61c3..fd68001e83 100644 --- a/examples/pytorch/deepspeed-demo/pytorch_deepspeed_demo.yaml +++ b/examples/pytorch/deepspeed-demo/pytorch_deepspeed_demo.yaml @@ -11,7 +11,7 @@ spec: spec: containers: - name: pytorch - image: kubeflow/pytorch-deepspeed-demo:latest + image: ghcr.io/kubeflow/training/pytorch-deepspeed-demo:latest command: - torchrun - /train_bert_ds.py @@ -27,7 +27,7 @@ spec: spec: containers: - name: pytorch - image: kubeflow/pytorch-deepspeed-demo:latest + image: ghcr.io/kubeflow/training/pytorch-deepspeed-demo:latest command: - torchrun - /train_bert_ds.py diff --git a/examples/pytorch/elastic/echo/echo.yaml b/examples/pytorch/elastic/echo/echo.yaml index ee51ec8418..46344f5ef8 100644 --- a/examples/pytorch/elastic/echo/echo.yaml +++ b/examples/pytorch/elastic/echo/echo.yaml @@ -15,7 +15,7 @@ spec: spec: containers: - name: pytorch - image: kubeflow/pytorch-elastic-example-echo:latest + image: ghcr.io/kubeflow/training/pytorch-elastic-example-echo:latest imagePullPolicy: IfNotPresent env: - name: LOGLEVEL diff --git a/examples/pytorch/elastic/imagenet/imagenet.yaml b/examples/pytorch/elastic/imagenet/imagenet.yaml index b52b2c2594..c2eb1de1b1 100644 --- a/examples/pytorch/elastic/imagenet/imagenet.yaml +++ b/examples/pytorch/elastic/imagenet/imagenet.yaml @@ -23,7 +23,7 @@ spec: spec: containers: - name: pytorch - image: kubeflow/pytorch-elastic-example-imagenet:latest + image: ghcr.io/kubeflow/training/pytorch-elastic-example-imagenet:latest imagePullPolicy: IfNotPresent resources: requests: diff --git a/examples/pytorch/image-classification/create-pytorchjob.ipynb b/examples/pytorch/image-classification/create-pytorchjob.ipynb index f6cf4f9f67..b5d809bb3c 100644 --- a/examples/pytorch/image-classification/create-pytorchjob.ipynb +++ b/examples/pytorch/image-classification/create-pytorchjob.ipynb @@ -121,7 +121,7 @@ "\n", "container = V1Container(\n", " name=container_name,\n", - " image=\"kubeflow/pytorch-dist-mnist:latest\",\n", + " image=\"ghcr.io/kubeflow/training/pytorch-dist-mnist:latest\",\n", " args=[\"--backend\", \"gloo\"],\n", ")\n", "\n", diff --git a/examples/pytorch/mnist/v1/pytorch_job_mnist_gloo.yaml b/examples/pytorch/mnist/v1/pytorch_job_mnist_gloo.yaml index 3e42a2685f..f65f74c88a 100644 --- a/examples/pytorch/mnist/v1/pytorch_job_mnist_gloo.yaml +++ b/examples/pytorch/mnist/v1/pytorch_job_mnist_gloo.yaml @@ -11,7 +11,7 @@ spec: spec: containers: - name: pytorch - image: kubeflow/pytorch-dist-mnist:latest + image: ghcr.io/kubeflow/training/pytorch-dist-mnist:latest args: ["--backend", "gloo"] # Comment out the below resources to use the CPU. resources: @@ -24,7 +24,7 @@ spec: spec: containers: - name: pytorch - image: kubeflow/pytorch-dist-mnist:latest + image: ghcr.io/kubeflow/training/pytorch-dist-mnist:latest args: ["--backend", "gloo"] # Comment out the below resources to use the CPU. resources: diff --git a/examples/pytorch/mnist/v1/pytorch_job_mnist_mpi.yaml b/examples/pytorch/mnist/v1/pytorch_job_mnist_mpi.yaml index 53b8da80ea..584f7bf618 100644 --- a/examples/pytorch/mnist/v1/pytorch_job_mnist_mpi.yaml +++ b/examples/pytorch/mnist/v1/pytorch_job_mnist_mpi.yaml @@ -11,7 +11,7 @@ spec: spec: containers: - name: pytorch - image: kubeflow/pytorch-dist-mnist:latest + image: ghcr.io/kubeflow/training/pytorch-dist-mnist:latest args: ["--backend", "mpi"] # Comment out the below resources to use the CPU. resources: @@ -24,7 +24,7 @@ spec: spec: containers: - name: pytorch - image: kubeflow/pytorch-dist-mnist:latest + image: ghcr.io/kubeflow/training/pytorch-dist-mnist:latest args: ["--backend", "mpi"] # Comment out the below resources to use the CPU. resources: diff --git a/examples/pytorch/mnist/v1/pytorch_job_mnist_nccl.yaml b/examples/pytorch/mnist/v1/pytorch_job_mnist_nccl.yaml index 0807abe32f..b28943c781 100644 --- a/examples/pytorch/mnist/v1/pytorch_job_mnist_nccl.yaml +++ b/examples/pytorch/mnist/v1/pytorch_job_mnist_nccl.yaml @@ -11,7 +11,7 @@ spec: spec: containers: - name: pytorch - image: kubeflow/pytorch-dist-mnist:latest + image: ghcr.io/kubeflow/training/pytorch-dist-mnist:latest args: ["--backend", "nccl"] resources: limits: @@ -23,7 +23,7 @@ spec: spec: containers: - name: pytorch - image: kubeflow/pytorch-dist-mnist:latest + image: ghcr.io/kubeflow/training/pytorch-dist-mnist:latest args: ["--backend", "nccl"] resources: limits: diff --git a/examples/pytorch/smoke-dist/pytorch_job_sendrecv.yaml b/examples/pytorch/smoke-dist/pytorch_job_sendrecv.yaml index fc19bc210e..e59c56db93 100644 --- a/examples/pytorch/smoke-dist/pytorch_job_sendrecv.yaml +++ b/examples/pytorch/smoke-dist/pytorch_job_sendrecv.yaml @@ -11,7 +11,7 @@ spec: spec: containers: - name: pytorch - image: kubeflow/pytorch-dist-sendrecv-test:latest + image: ghcr.io/kubeflow/training/pytorch-dist-sendrecv-test:latest Worker: replicas: 3 restartPolicy: OnFailure @@ -19,4 +19,4 @@ spec: spec: containers: - name: pytorch - image: kubeflow/pytorch-dist-sendrecv-test:latest + image: ghcr.io/kubeflow/training/pytorch-dist-sendrecv-test:latest diff --git a/examples/tensorflow/dist-mnist/tf_job_mnist.yaml b/examples/tensorflow/dist-mnist/tf_job_mnist.yaml index c97d03b700..91f22720a6 100644 --- a/examples/tensorflow/dist-mnist/tf_job_mnist.yaml +++ b/examples/tensorflow/dist-mnist/tf_job_mnist.yaml @@ -11,7 +11,7 @@ spec: spec: containers: - name: tensorflow - image: kubeflow/tf-dist-mnist-test:latest + image: ghcr.io/kubeflow/training/tf-dist-mnist-test:latest PS: replicas: 1 @@ -20,7 +20,7 @@ spec: spec: containers: - name: tensorflow - image: kubeflow/tf-dist-mnist-test:latest + image: ghcr.io/kubeflow/training/tf-dist-mnist-test:latest Worker: replicas: 2 @@ -29,4 +29,4 @@ spec: spec: containers: - name: tensorflow - image: kubeflow/tf-dist-mnist-test:latest + image: ghcr.io/kubeflow/training/tf-dist-mnist-test:latest diff --git a/examples/tensorflow/distribution_strategy/multi_worker_tfjob.yaml b/examples/tensorflow/distribution_strategy/multi_worker_tfjob.yaml index b9500f2d5e..2b448ebdfb 100644 --- a/examples/tensorflow/distribution_strategy/multi_worker_tfjob.yaml +++ b/examples/tensorflow/distribution_strategy/multi_worker_tfjob.yaml @@ -13,7 +13,7 @@ spec: spec: containers: - name: tensorflow - image: kubeflow/tf-multi-worker-strategy:latest + image: ghcr.io/kubeflow/training/tf-multi-worker-strategy:latest volumeMounts: - mountPath: /train name: training diff --git a/examples/tensorflow/mnist_with_summaries/tf_job_mnist.yaml b/examples/tensorflow/mnist_with_summaries/tf_job_mnist.yaml index 3c1884a05c..a1a5487187 100644 --- a/examples/tensorflow/mnist_with_summaries/tf_job_mnist.yaml +++ b/examples/tensorflow/mnist_with_summaries/tf_job_mnist.yaml @@ -14,7 +14,7 @@ spec: spec: containers: - name: tensorflow - image: kubeflow/tf-mnist-with-summaries:latest + image: ghcr.io/kubeflow/training/tf-mnist-with-summaries:latest command: - "python" - "/var/tf_mnist/mnist_with_summaries.py" diff --git a/examples/tensorflow/simple.yaml b/examples/tensorflow/simple.yaml index 4c2a0a76e6..ce4d7c1f49 100644 --- a/examples/tensorflow/simple.yaml +++ b/examples/tensorflow/simple.yaml @@ -12,7 +12,7 @@ spec: spec: containers: - name: tensorflow - image: kubeflow/tf-mnist-with-summaries:latest + image: ghcr.io/kubeflow/training/tf-mnist-with-summaries:latest command: - "python" - "/var/tf_mnist/mnist_with_summaries.py" diff --git a/examples/xgboost/lightgbm-dist/xgboostjob_v1_lightgbm_dist_training.yaml b/examples/xgboost/lightgbm-dist/xgboostjob_v1_lightgbm_dist_training.yaml index 35487b1511..1c26d6c141 100644 --- a/examples/xgboost/lightgbm-dist/xgboostjob_v1_lightgbm_dist_training.yaml +++ b/examples/xgboost/lightgbm-dist/xgboostjob_v1_lightgbm_dist_training.yaml @@ -11,7 +11,7 @@ spec: spec: containers: - name: xgboost - image: kubeflow/lightgbm-dist-py-test:1.0 + image: ghcr.io/kubeflow/training/lightgbm-dist-py-test:latest ports: - containerPort: 9991 name: xgboostjob-port @@ -45,7 +45,7 @@ spec: spec: containers: - name: xgboost - image: kubeflow/lightgbm-dist-py-test:1.0 + image: ghcr.io/kubeflow/training/lightgbm-dist-py-test:latest ports: - containerPort: 9991 name: xgboostjob-port diff --git a/examples/xgboost/smoke-dist/xgboostjob_v1_rabit_test.yaml b/examples/xgboost/smoke-dist/xgboostjob_v1_rabit_test.yaml index 80ce3adce1..cf03e97937 100644 --- a/examples/xgboost/smoke-dist/xgboostjob_v1_rabit_test.yaml +++ b/examples/xgboost/smoke-dist/xgboostjob_v1_rabit_test.yaml @@ -11,7 +11,7 @@ spec: spec: containers: - name: xgboost - image: docker.io/kubeflow/xgboost-dist-rabit-test:latest + image: ghcr.io/kubeflow/training/xgboost-dist-rabit-test:latest ports: - containerPort: 9991 name: xgboostjob-port @@ -23,7 +23,7 @@ spec: spec: containers: - name: xgboost - image: docker.io/kubeflow/xgboost-dist-rabit-test:latest + image: ghcr.io/kubeflow/training/xgboost-dist-rabit-test:latest ports: - containerPort: 9991 name: xgboostjob-port diff --git a/examples/xgboost/smoke-dist/xgboostjob_v1alpha1_rabit_test.yaml b/examples/xgboost/smoke-dist/xgboostjob_v1alpha1_rabit_test.yaml index 5364eb07cd..cb08fcf36d 100644 --- a/examples/xgboost/smoke-dist/xgboostjob_v1alpha1_rabit_test.yaml +++ b/examples/xgboost/smoke-dist/xgboostjob_v1alpha1_rabit_test.yaml @@ -13,7 +13,7 @@ spec: spec: containers: - name: xgboost - image: docker.io/kubeflow/xgboost-dist-rabit-test:latest + image: ghcr.io/kubeflow/training/xgboost-dist-rabit-test:latest ports: - containerPort: 9991 name: xgboostjob-port @@ -27,7 +27,7 @@ spec: spec: containers: - name: xgboost - image: docker.io/kubeflow/xgboost-dist-rabit-test:latest + image: ghcr.io/kubeflow/training/xgboost-dist-rabit-test:latest ports: - containerPort: 9991 name: xgboostjob-port diff --git a/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_predict.yaml b/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_predict.yaml index 9f0c773b1f..6751f3979f 100644 --- a/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_predict.yaml +++ b/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_predict.yaml @@ -11,7 +11,7 @@ spec: spec: containers: - name: xgboost - image: docker.io/kubeflow/xgboost-dist-iris:latest + image: ghcr.io/kubeflow/training/xgboost-dist-iris:latest ports: - containerPort: 9991 name: xgboostjob-port @@ -28,7 +28,7 @@ spec: spec: containers: - name: xgboost - image: docker.io/kubeflow/xgboost-dist-iris:latest + image: ghcr.io/kubeflow/training/xgboost-dist-iris:latest ports: - containerPort: 9991 name: xgboostjob-port diff --git a/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_predict_local.yaml b/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_predict_local.yaml index 168de9971d..4eda778bef 100644 --- a/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_predict_local.yaml +++ b/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_predict_local.yaml @@ -15,7 +15,7 @@ spec: claimName: xgboostlocal containers: - name: xgboost - image: docker.io/kubeflow/xgboost-dist-iris:latest + image: ghcr.io/kubeflow/training/xgboost-dist-iris:latest volumeMounts: - name: task-pv-storage mountPath: /tmp/xgboost_model @@ -38,7 +38,7 @@ spec: claimName: xgboostlocal containers: - name: xgboost - image: docker.io/kubeflow/xgboost-dist-iris:latest + image: ghcr.io/kubeflow/training/xgboost-dist-iris:latest volumeMounts: - name: task-pv-storage mountPath: /tmp/xgboost_model diff --git a/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train.yaml b/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train.yaml index 5824ec056d..e0bc8579be 100644 --- a/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train.yaml +++ b/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train.yaml @@ -11,7 +11,7 @@ spec: spec: containers: - name: xgboost - image: docker.io/kubeflow/xgboost-dist-iris:latest + image: ghcr.io/kubeflow/training/xgboost-dist-iris:latest ports: - containerPort: 9991 name: xgboostjob-port @@ -30,7 +30,7 @@ spec: spec: containers: - name: xgboost - image: docker.io/kubeflow/xgboost-dist-iris:latest + image: ghcr.io/kubeflow/training/xgboost-dist-iris:latest ports: - containerPort: 9991 name: xgboostjob-port diff --git a/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train_local.yaml b/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train_local.yaml index 2798587bb2..b160acc12d 100644 --- a/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train_local.yaml +++ b/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train_local.yaml @@ -15,7 +15,7 @@ spec: claimName: xgboostlocal containers: - name: xgboost - image: docker.io/kubeflow/xgboost-dist-iris:latest + image: ghcr.io/kubeflow/training/xgboost-dist-iris:latest volumeMounts: - name: task-pv-storage mountPath: /tmp/xgboost_model @@ -41,7 +41,7 @@ spec: claimName: xgboostlocal containers: - name: xgboost - image: docker.io/kubeflow/xgboost-dist-iris:latest + image: ghcr.io/kubeflow/training/xgboost-dist-iris:latest volumeMounts: - name: task-pv-storage mountPath: /tmp/xgboost_model diff --git a/examples/xgboost/xgboostjob.yaml b/examples/xgboost/xgboostjob.yaml index 5824ec056d..e0bc8579be 100644 --- a/examples/xgboost/xgboostjob.yaml +++ b/examples/xgboost/xgboostjob.yaml @@ -11,7 +11,7 @@ spec: spec: containers: - name: xgboost - image: docker.io/kubeflow/xgboost-dist-iris:latest + image: ghcr.io/kubeflow/training/xgboost-dist-iris:latest ports: - containerPort: 9991 name: xgboostjob-port @@ -30,7 +30,7 @@ spec: spec: containers: - name: xgboost - image: docker.io/kubeflow/xgboost-dist-iris:latest + image: ghcr.io/kubeflow/training/xgboost-dist-iris:latest ports: - containerPort: 9991 name: xgboostjob-port diff --git a/pkg/config/config.go b/pkg/config/config.go index 68461998f2..df3ee9c474 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -32,5 +32,5 @@ const ( // PyTorchInitContainerMaxTriesDefault is the default number of tries for the pytorch init container. PyTorchInitContainerMaxTriesDefault = 100 // MPIKubectlDeliveryImageDefault is the default image for launcher pod in MPIJob init container. - MPIKubectlDeliveryImageDefault = "kubeflow/kubectl-delivery:latest" + MPIKubectlDeliveryImageDefault = "ghcr.io/kubeflow/training/kubectl-delivery:latest" ) diff --git a/sdk/python/kubeflow/training/constants/constants.py b/sdk/python/kubeflow/training/constants/constants.py index 2a5415ea26..9a9fb89f52 100644 --- a/sdk/python/kubeflow/training/constants/constants.py +++ b/sdk/python/kubeflow/training/constants/constants.py @@ -84,7 +84,7 @@ # TODO (andreyvelich): We should add image tag for Storage Initializer and Trainer. STORAGE_INITIALIZER_IMAGE = os.getenv( - "STORAGE_INITIALIZER_IMAGE", "docker.io/kubeflow/storage-initializer" + "STORAGE_INITIALIZER_IMAGE", "ghcr.io/kubeflow/training/storage-initializer" ) STORAGE_INITIALIZER_VOLUME_MOUNT = models.V1VolumeMount( @@ -93,7 +93,7 @@ ) TRAINER_TRANSFORMER_IMAGE = os.getenv( - "TRAINER_TRANSFORMER_IMAGE", "docker.io/kubeflow/trainer-huggingface" + "TRAINER_TRANSFORMER_IMAGE", "ghcr.io/kubeflow/training/trainer-huggingface" ) # TFJob constants. @@ -153,7 +153,7 @@ JAXJOB_PLURAL = "jaxjobs" JAXJOB_CONTAINER = "jax" JAXJOB_REPLICA_TYPES = REPLICA_TYPE_WORKER.lower() -JAXJOB_BASE_IMAGE = "docker.io/kubeflow/jaxjob-dist-spmd-mnist:latest" +JAXJOB_BASE_IMAGE = "ghcr.io/kubeflow/training/jaxjob-dist-spmd-mnist:latest" # Dictionary to get plural, model, and container for each Job kind. JOB_PARAMETERS = { From f6294f05fddcfc5a5be2616dc15281b0ec0ee447 Mon Sep 17 00:00:00 2001 From: sailesh duddupudi Date: Wed, 19 Mar 2025 15:22:32 +0000 Subject: [PATCH 3/5] revert trainer to training in ghcr prefix Signed-off-by: sailesh duddupudi --- .github/workflows/build-and-publish-images.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-and-publish-images.yaml b/.github/workflows/build-and-publish-images.yaml index 65db0af82d..3a5640e7e0 100644 --- a/.github/workflows/build-and-publish-images.yaml +++ b/.github/workflows/build-and-publish-images.yaml @@ -75,7 +75,7 @@ jobs: with: image: | docker.io/kubeflow/${{ inputs.component-name }} - ghcr.io/kubeflow/trainer/${{ inputs.component-name }} + ghcr.io/kubeflow/training/${{ inputs.component-name }} dockerfile: ${{ inputs.dockerfile }} platforms: ${{ inputs.platforms }} context: ${{ inputs.context }} @@ -88,7 +88,7 @@ jobs: with: image: | docker.io/kubeflow/${{ inputs.component-name }} - ghcr.io/kubeflow/trainer/${{ inputs.component-name }} + ghcr.io/kubeflow/training/${{ inputs.component-name }} dockerfile: ${{ inputs.dockerfile }} platforms: ${{ inputs.platforms }} context: ${{ inputs.context }} From 68838a836b46e632524f77d4f73c9dd83e7e8a7d Mon Sep 17 00:00:00 2001 From: sailesh duddupudi Date: Thu, 20 Mar 2025 14:19:27 +0000 Subject: [PATCH 4/5] update prefix Signed-off-by: sailesh duddupudi --- .github/workflows/build-and-publish-images.yaml | 4 ++-- Makefile | 2 +- examples/jax/cpu-demo/demo.yaml | 2 +- .../jax-dist-spmd-mnist/jaxjob_dist_spmd_mnist_gloo.yaml | 2 +- examples/pytorch/deepspeed-demo/pytorch_deepspeed_demo.yaml | 4 ++-- examples/pytorch/elastic/echo/echo.yaml | 2 +- examples/pytorch/elastic/imagenet/imagenet.yaml | 2 +- .../pytorch/image-classification/create-pytorchjob.ipynb | 2 +- examples/pytorch/mnist/v1/pytorch_job_mnist_gloo.yaml | 4 ++-- examples/pytorch/mnist/v1/pytorch_job_mnist_mpi.yaml | 4 ++-- examples/pytorch/mnist/v1/pytorch_job_mnist_nccl.yaml | 4 ++-- examples/pytorch/smoke-dist/pytorch_job_sendrecv.yaml | 4 ++-- examples/tensorflow/dist-mnist/tf_job_mnist.yaml | 6 +++--- .../distribution_strategy/multi_worker_tfjob.yaml | 2 +- examples/tensorflow/mnist_with_summaries/tf_job_mnist.yaml | 2 +- examples/tensorflow/simple.yaml | 2 +- .../lightgbm-dist/xgboostjob_v1_lightgbm_dist_training.yaml | 4 ++-- examples/xgboost/smoke-dist/xgboostjob_v1_rabit_test.yaml | 4 ++-- .../xgboost/smoke-dist/xgboostjob_v1alpha1_rabit_test.yaml | 4 ++-- .../xgboost/xgboost-dist/xgboostjob_v1_iris_predict.yaml | 4 ++-- .../xgboost-dist/xgboostjob_v1_iris_predict_local.yaml | 4 ++-- examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train.yaml | 4 ++-- .../xgboost-dist/xgboostjob_v1_iris_train_local.yaml | 4 ++-- examples/xgboost/xgboostjob.yaml | 4 ++-- manifests/overlays/kubeflow/kustomization.yaml | 2 +- manifests/overlays/standalone/kustomization.yaml | 2 +- pkg/config/config.go | 2 +- sdk/python/kubeflow/training/constants/constants.py | 6 +++--- 28 files changed, 46 insertions(+), 46 deletions(-) diff --git a/.github/workflows/build-and-publish-images.yaml b/.github/workflows/build-and-publish-images.yaml index 3a5640e7e0..a4ca1944d2 100644 --- a/.github/workflows/build-and-publish-images.yaml +++ b/.github/workflows/build-and-publish-images.yaml @@ -75,7 +75,7 @@ jobs: with: image: | docker.io/kubeflow/${{ inputs.component-name }} - ghcr.io/kubeflow/training/${{ inputs.component-name }} + ghcr.io/kubeflow/training-v1/${{ inputs.component-name }} dockerfile: ${{ inputs.dockerfile }} platforms: ${{ inputs.platforms }} context: ${{ inputs.context }} @@ -88,7 +88,7 @@ jobs: with: image: | docker.io/kubeflow/${{ inputs.component-name }} - ghcr.io/kubeflow/training/${{ inputs.component-name }} + ghcr.io/kubeflow/training-v1/${{ inputs.component-name }} dockerfile: ${{ inputs.dockerfile }} platforms: ${{ inputs.platforms }} context: ${{ inputs.context }} diff --git a/Makefile b/Makefile index d9908ac512..a85c80fadc 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # Image URL to use all building/pushing image targets -IMG ?= ghcr.io/kubeflow/training/training-operator:latest +IMG ?= ghcr.io/kubeflow/training-v1/training-operator:latest # CRD generation options CRD_OPTIONS ?= "crd:generateEmbeddedObjectMeta=true,maxDescLen=400" diff --git a/examples/jax/cpu-demo/demo.yaml b/examples/jax/cpu-demo/demo.yaml index a5adef5b36..2c7bdc8086 100644 --- a/examples/jax/cpu-demo/demo.yaml +++ b/examples/jax/cpu-demo/demo.yaml @@ -12,7 +12,7 @@ spec: spec: containers: - name: jax - image: ghcr.io/kubeflow/training/jaxjob-simple:latest + image: ghcr.io/kubeflow/training-v1/jaxjob-simple:latest command: - "python3" - "train.py" diff --git a/examples/jax/jax-dist-spmd-mnist/jaxjob_dist_spmd_mnist_gloo.yaml b/examples/jax/jax-dist-spmd-mnist/jaxjob_dist_spmd_mnist_gloo.yaml index ce8b1497ad..93a9d87a80 100644 --- a/examples/jax/jax-dist-spmd-mnist/jaxjob_dist_spmd_mnist_gloo.yaml +++ b/examples/jax/jax-dist-spmd-mnist/jaxjob_dist_spmd_mnist_gloo.yaml @@ -12,5 +12,5 @@ spec: spec: containers: - name: jax - image: ghcr.io/kubeflow/training/jaxjob-dist-spmd-mnist:latest + image: ghcr.io/kubeflow/training-v1/jaxjob-dist-spmd-mnist:latest imagePullPolicy: Always diff --git a/examples/pytorch/deepspeed-demo/pytorch_deepspeed_demo.yaml b/examples/pytorch/deepspeed-demo/pytorch_deepspeed_demo.yaml index fd68001e83..59315c05a8 100644 --- a/examples/pytorch/deepspeed-demo/pytorch_deepspeed_demo.yaml +++ b/examples/pytorch/deepspeed-demo/pytorch_deepspeed_demo.yaml @@ -11,7 +11,7 @@ spec: spec: containers: - name: pytorch - image: ghcr.io/kubeflow/training/pytorch-deepspeed-demo:latest + image: ghcr.io/kubeflow/training-v1/pytorch-deepspeed-demo:latest command: - torchrun - /train_bert_ds.py @@ -27,7 +27,7 @@ spec: spec: containers: - name: pytorch - image: ghcr.io/kubeflow/training/pytorch-deepspeed-demo:latest + image: ghcr.io/kubeflow/training-v1/pytorch-deepspeed-demo:latest command: - torchrun - /train_bert_ds.py diff --git a/examples/pytorch/elastic/echo/echo.yaml b/examples/pytorch/elastic/echo/echo.yaml index 46344f5ef8..04e4579ce8 100644 --- a/examples/pytorch/elastic/echo/echo.yaml +++ b/examples/pytorch/elastic/echo/echo.yaml @@ -15,7 +15,7 @@ spec: spec: containers: - name: pytorch - image: ghcr.io/kubeflow/training/pytorch-elastic-example-echo:latest + image: ghcr.io/kubeflow/training-v1/pytorch-elastic-example-echo:latest imagePullPolicy: IfNotPresent env: - name: LOGLEVEL diff --git a/examples/pytorch/elastic/imagenet/imagenet.yaml b/examples/pytorch/elastic/imagenet/imagenet.yaml index c2eb1de1b1..9db66ff260 100644 --- a/examples/pytorch/elastic/imagenet/imagenet.yaml +++ b/examples/pytorch/elastic/imagenet/imagenet.yaml @@ -23,7 +23,7 @@ spec: spec: containers: - name: pytorch - image: ghcr.io/kubeflow/training/pytorch-elastic-example-imagenet:latest + image: ghcr.io/kubeflow/training-v1/pytorch-elastic-example-imagenet:latest imagePullPolicy: IfNotPresent resources: requests: diff --git a/examples/pytorch/image-classification/create-pytorchjob.ipynb b/examples/pytorch/image-classification/create-pytorchjob.ipynb index b5d809bb3c..b58b7b62f5 100644 --- a/examples/pytorch/image-classification/create-pytorchjob.ipynb +++ b/examples/pytorch/image-classification/create-pytorchjob.ipynb @@ -121,7 +121,7 @@ "\n", "container = V1Container(\n", " name=container_name,\n", - " image=\"ghcr.io/kubeflow/training/pytorch-dist-mnist:latest\",\n", + " image=\"ghcr.io/kubeflow/training-v1/pytorch-dist-mnist:latest\",\n", " args=[\"--backend\", \"gloo\"],\n", ")\n", "\n", diff --git a/examples/pytorch/mnist/v1/pytorch_job_mnist_gloo.yaml b/examples/pytorch/mnist/v1/pytorch_job_mnist_gloo.yaml index f65f74c88a..4de020058b 100644 --- a/examples/pytorch/mnist/v1/pytorch_job_mnist_gloo.yaml +++ b/examples/pytorch/mnist/v1/pytorch_job_mnist_gloo.yaml @@ -11,7 +11,7 @@ spec: spec: containers: - name: pytorch - image: ghcr.io/kubeflow/training/pytorch-dist-mnist:latest + image: ghcr.io/kubeflow/training-v1/pytorch-dist-mnist:latest args: ["--backend", "gloo"] # Comment out the below resources to use the CPU. resources: @@ -24,7 +24,7 @@ spec: spec: containers: - name: pytorch - image: ghcr.io/kubeflow/training/pytorch-dist-mnist:latest + image: ghcr.io/kubeflow/training-v1/pytorch-dist-mnist:latest args: ["--backend", "gloo"] # Comment out the below resources to use the CPU. resources: diff --git a/examples/pytorch/mnist/v1/pytorch_job_mnist_mpi.yaml b/examples/pytorch/mnist/v1/pytorch_job_mnist_mpi.yaml index 584f7bf618..72976411e7 100644 --- a/examples/pytorch/mnist/v1/pytorch_job_mnist_mpi.yaml +++ b/examples/pytorch/mnist/v1/pytorch_job_mnist_mpi.yaml @@ -11,7 +11,7 @@ spec: spec: containers: - name: pytorch - image: ghcr.io/kubeflow/training/pytorch-dist-mnist:latest + image: ghcr.io/kubeflow/training-v1/pytorch-dist-mnist:latest args: ["--backend", "mpi"] # Comment out the below resources to use the CPU. resources: @@ -24,7 +24,7 @@ spec: spec: containers: - name: pytorch - image: ghcr.io/kubeflow/training/pytorch-dist-mnist:latest + image: ghcr.io/kubeflow/training-v1/pytorch-dist-mnist:latest args: ["--backend", "mpi"] # Comment out the below resources to use the CPU. resources: diff --git a/examples/pytorch/mnist/v1/pytorch_job_mnist_nccl.yaml b/examples/pytorch/mnist/v1/pytorch_job_mnist_nccl.yaml index b28943c781..377c16adb1 100644 --- a/examples/pytorch/mnist/v1/pytorch_job_mnist_nccl.yaml +++ b/examples/pytorch/mnist/v1/pytorch_job_mnist_nccl.yaml @@ -11,7 +11,7 @@ spec: spec: containers: - name: pytorch - image: ghcr.io/kubeflow/training/pytorch-dist-mnist:latest + image: ghcr.io/kubeflow/training-v1/pytorch-dist-mnist:latest args: ["--backend", "nccl"] resources: limits: @@ -23,7 +23,7 @@ spec: spec: containers: - name: pytorch - image: ghcr.io/kubeflow/training/pytorch-dist-mnist:latest + image: ghcr.io/kubeflow/training-v1/pytorch-dist-mnist:latest args: ["--backend", "nccl"] resources: limits: diff --git a/examples/pytorch/smoke-dist/pytorch_job_sendrecv.yaml b/examples/pytorch/smoke-dist/pytorch_job_sendrecv.yaml index e59c56db93..b62d419488 100644 --- a/examples/pytorch/smoke-dist/pytorch_job_sendrecv.yaml +++ b/examples/pytorch/smoke-dist/pytorch_job_sendrecv.yaml @@ -11,7 +11,7 @@ spec: spec: containers: - name: pytorch - image: ghcr.io/kubeflow/training/pytorch-dist-sendrecv-test:latest + image: ghcr.io/kubeflow/training-v1/pytorch-dist-sendrecv-test:latest Worker: replicas: 3 restartPolicy: OnFailure @@ -19,4 +19,4 @@ spec: spec: containers: - name: pytorch - image: ghcr.io/kubeflow/training/pytorch-dist-sendrecv-test:latest + image: ghcr.io/kubeflow/training-v1/pytorch-dist-sendrecv-test:latest diff --git a/examples/tensorflow/dist-mnist/tf_job_mnist.yaml b/examples/tensorflow/dist-mnist/tf_job_mnist.yaml index 91f22720a6..ed38d97cf2 100644 --- a/examples/tensorflow/dist-mnist/tf_job_mnist.yaml +++ b/examples/tensorflow/dist-mnist/tf_job_mnist.yaml @@ -11,7 +11,7 @@ spec: spec: containers: - name: tensorflow - image: ghcr.io/kubeflow/training/tf-dist-mnist-test:latest + image: ghcr.io/kubeflow/training-v1/tf-dist-mnist-test:latest PS: replicas: 1 @@ -20,7 +20,7 @@ spec: spec: containers: - name: tensorflow - image: ghcr.io/kubeflow/training/tf-dist-mnist-test:latest + image: ghcr.io/kubeflow/training-v1/tf-dist-mnist-test:latest Worker: replicas: 2 @@ -29,4 +29,4 @@ spec: spec: containers: - name: tensorflow - image: ghcr.io/kubeflow/training/tf-dist-mnist-test:latest + image: ghcr.io/kubeflow/training-v1/tf-dist-mnist-test:latest diff --git a/examples/tensorflow/distribution_strategy/multi_worker_tfjob.yaml b/examples/tensorflow/distribution_strategy/multi_worker_tfjob.yaml index 2b448ebdfb..aec6043057 100644 --- a/examples/tensorflow/distribution_strategy/multi_worker_tfjob.yaml +++ b/examples/tensorflow/distribution_strategy/multi_worker_tfjob.yaml @@ -13,7 +13,7 @@ spec: spec: containers: - name: tensorflow - image: ghcr.io/kubeflow/training/tf-multi-worker-strategy:latest + image: ghcr.io/kubeflow/training-v1/tf-multi-worker-strategy:latest volumeMounts: - mountPath: /train name: training diff --git a/examples/tensorflow/mnist_with_summaries/tf_job_mnist.yaml b/examples/tensorflow/mnist_with_summaries/tf_job_mnist.yaml index a1a5487187..2ca624711c 100644 --- a/examples/tensorflow/mnist_with_summaries/tf_job_mnist.yaml +++ b/examples/tensorflow/mnist_with_summaries/tf_job_mnist.yaml @@ -14,7 +14,7 @@ spec: spec: containers: - name: tensorflow - image: ghcr.io/kubeflow/training/tf-mnist-with-summaries:latest + image: ghcr.io/kubeflow/training-v1/tf-mnist-with-summaries:latest command: - "python" - "/var/tf_mnist/mnist_with_summaries.py" diff --git a/examples/tensorflow/simple.yaml b/examples/tensorflow/simple.yaml index ce4d7c1f49..64e6895387 100644 --- a/examples/tensorflow/simple.yaml +++ b/examples/tensorflow/simple.yaml @@ -12,7 +12,7 @@ spec: spec: containers: - name: tensorflow - image: ghcr.io/kubeflow/training/tf-mnist-with-summaries:latest + image: ghcr.io/kubeflow/training-v1/tf-mnist-with-summaries:latest command: - "python" - "/var/tf_mnist/mnist_with_summaries.py" diff --git a/examples/xgboost/lightgbm-dist/xgboostjob_v1_lightgbm_dist_training.yaml b/examples/xgboost/lightgbm-dist/xgboostjob_v1_lightgbm_dist_training.yaml index 1c26d6c141..c14133df66 100644 --- a/examples/xgboost/lightgbm-dist/xgboostjob_v1_lightgbm_dist_training.yaml +++ b/examples/xgboost/lightgbm-dist/xgboostjob_v1_lightgbm_dist_training.yaml @@ -11,7 +11,7 @@ spec: spec: containers: - name: xgboost - image: ghcr.io/kubeflow/training/lightgbm-dist-py-test:latest + image: ghcr.io/kubeflow/training-v1/lightgbm-dist-py-test:latest ports: - containerPort: 9991 name: xgboostjob-port @@ -45,7 +45,7 @@ spec: spec: containers: - name: xgboost - image: ghcr.io/kubeflow/training/lightgbm-dist-py-test:latest + image: ghcr.io/kubeflow/training-v1/lightgbm-dist-py-test:latest ports: - containerPort: 9991 name: xgboostjob-port diff --git a/examples/xgboost/smoke-dist/xgboostjob_v1_rabit_test.yaml b/examples/xgboost/smoke-dist/xgboostjob_v1_rabit_test.yaml index cf03e97937..703984b673 100644 --- a/examples/xgboost/smoke-dist/xgboostjob_v1_rabit_test.yaml +++ b/examples/xgboost/smoke-dist/xgboostjob_v1_rabit_test.yaml @@ -11,7 +11,7 @@ spec: spec: containers: - name: xgboost - image: ghcr.io/kubeflow/training/xgboost-dist-rabit-test:latest + image: ghcr.io/kubeflow/training-v1/xgboost-dist-rabit-test:latest ports: - containerPort: 9991 name: xgboostjob-port @@ -23,7 +23,7 @@ spec: spec: containers: - name: xgboost - image: ghcr.io/kubeflow/training/xgboost-dist-rabit-test:latest + image: ghcr.io/kubeflow/training-v1/xgboost-dist-rabit-test:latest ports: - containerPort: 9991 name: xgboostjob-port diff --git a/examples/xgboost/smoke-dist/xgboostjob_v1alpha1_rabit_test.yaml b/examples/xgboost/smoke-dist/xgboostjob_v1alpha1_rabit_test.yaml index cb08fcf36d..2393c0ae26 100644 --- a/examples/xgboost/smoke-dist/xgboostjob_v1alpha1_rabit_test.yaml +++ b/examples/xgboost/smoke-dist/xgboostjob_v1alpha1_rabit_test.yaml @@ -13,7 +13,7 @@ spec: spec: containers: - name: xgboost - image: ghcr.io/kubeflow/training/xgboost-dist-rabit-test:latest + image: ghcr.io/kubeflow/training-v1/xgboost-dist-rabit-test:latest ports: - containerPort: 9991 name: xgboostjob-port @@ -27,7 +27,7 @@ spec: spec: containers: - name: xgboost - image: ghcr.io/kubeflow/training/xgboost-dist-rabit-test:latest + image: ghcr.io/kubeflow/training-v1/xgboost-dist-rabit-test:latest ports: - containerPort: 9991 name: xgboostjob-port diff --git a/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_predict.yaml b/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_predict.yaml index 6751f3979f..e196443c09 100644 --- a/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_predict.yaml +++ b/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_predict.yaml @@ -11,7 +11,7 @@ spec: spec: containers: - name: xgboost - image: ghcr.io/kubeflow/training/xgboost-dist-iris:latest + image: ghcr.io/kubeflow/training-v1/xgboost-dist-iris:latest ports: - containerPort: 9991 name: xgboostjob-port @@ -28,7 +28,7 @@ spec: spec: containers: - name: xgboost - image: ghcr.io/kubeflow/training/xgboost-dist-iris:latest + image: ghcr.io/kubeflow/training-v1/xgboost-dist-iris:latest ports: - containerPort: 9991 name: xgboostjob-port diff --git a/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_predict_local.yaml b/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_predict_local.yaml index 4eda778bef..92bf0b5cef 100644 --- a/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_predict_local.yaml +++ b/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_predict_local.yaml @@ -15,7 +15,7 @@ spec: claimName: xgboostlocal containers: - name: xgboost - image: ghcr.io/kubeflow/training/xgboost-dist-iris:latest + image: ghcr.io/kubeflow/training-v1/xgboost-dist-iris:latest volumeMounts: - name: task-pv-storage mountPath: /tmp/xgboost_model @@ -38,7 +38,7 @@ spec: claimName: xgboostlocal containers: - name: xgboost - image: ghcr.io/kubeflow/training/xgboost-dist-iris:latest + image: ghcr.io/kubeflow/training-v1/xgboost-dist-iris:latest volumeMounts: - name: task-pv-storage mountPath: /tmp/xgboost_model diff --git a/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train.yaml b/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train.yaml index e0bc8579be..3a141449da 100644 --- a/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train.yaml +++ b/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train.yaml @@ -11,7 +11,7 @@ spec: spec: containers: - name: xgboost - image: ghcr.io/kubeflow/training/xgboost-dist-iris:latest + image: ghcr.io/kubeflow/training-v1/xgboost-dist-iris:latest ports: - containerPort: 9991 name: xgboostjob-port @@ -30,7 +30,7 @@ spec: spec: containers: - name: xgboost - image: ghcr.io/kubeflow/training/xgboost-dist-iris:latest + image: ghcr.io/kubeflow/training-v1/xgboost-dist-iris:latest ports: - containerPort: 9991 name: xgboostjob-port diff --git a/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train_local.yaml b/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train_local.yaml index b160acc12d..47b02cde9d 100644 --- a/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train_local.yaml +++ b/examples/xgboost/xgboost-dist/xgboostjob_v1_iris_train_local.yaml @@ -15,7 +15,7 @@ spec: claimName: xgboostlocal containers: - name: xgboost - image: ghcr.io/kubeflow/training/xgboost-dist-iris:latest + image: ghcr.io/kubeflow/training-v1/xgboost-dist-iris:latest volumeMounts: - name: task-pv-storage mountPath: /tmp/xgboost_model @@ -41,7 +41,7 @@ spec: claimName: xgboostlocal containers: - name: xgboost - image: ghcr.io/kubeflow/training/xgboost-dist-iris:latest + image: ghcr.io/kubeflow/training-v1/xgboost-dist-iris:latest volumeMounts: - name: task-pv-storage mountPath: /tmp/xgboost_model diff --git a/examples/xgboost/xgboostjob.yaml b/examples/xgboost/xgboostjob.yaml index e0bc8579be..3a141449da 100644 --- a/examples/xgboost/xgboostjob.yaml +++ b/examples/xgboost/xgboostjob.yaml @@ -11,7 +11,7 @@ spec: spec: containers: - name: xgboost - image: ghcr.io/kubeflow/training/xgboost-dist-iris:latest + image: ghcr.io/kubeflow/training-v1/xgboost-dist-iris:latest ports: - containerPort: 9991 name: xgboostjob-port @@ -30,7 +30,7 @@ spec: spec: containers: - name: xgboost - image: ghcr.io/kubeflow/training/xgboost-dist-iris:latest + image: ghcr.io/kubeflow/training-v1/xgboost-dist-iris:latest ports: - containerPort: 9991 name: xgboostjob-port diff --git a/manifests/overlays/kubeflow/kustomization.yaml b/manifests/overlays/kubeflow/kustomization.yaml index 9f2301c8c5..ad06e3babe 100644 --- a/manifests/overlays/kubeflow/kustomization.yaml +++ b/manifests/overlays/kubeflow/kustomization.yaml @@ -5,7 +5,7 @@ resources: - ../../base - kubeflow-training-roles.yaml images: - - name: ghcr.io/kubeflow/training/training-operator + - name: ghcr.io/kubeflow/training-v1/training-operator newTag: v1-f654b1e # TODO (tenzen-y): Once we support cert-manager, we need to remove this secret generation. # REF: https://github.com/kubeflow/training-operator/issues/2049 diff --git a/manifests/overlays/standalone/kustomization.yaml b/manifests/overlays/standalone/kustomization.yaml index 8bf594b784..f8fba59e23 100644 --- a/manifests/overlays/standalone/kustomization.yaml +++ b/manifests/overlays/standalone/kustomization.yaml @@ -5,7 +5,7 @@ resources: - ../../base - namespace.yaml images: - - name: ghcr.io/kubeflow/training/training-operator + - name: ghcr.io/kubeflow/training-v1/training-operator newTag: v1-f654b1e secretGenerator: - name: training-operator-webhook-cert diff --git a/pkg/config/config.go b/pkg/config/config.go index df3ee9c474..2a6d26514f 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -32,5 +32,5 @@ const ( // PyTorchInitContainerMaxTriesDefault is the default number of tries for the pytorch init container. PyTorchInitContainerMaxTriesDefault = 100 // MPIKubectlDeliveryImageDefault is the default image for launcher pod in MPIJob init container. - MPIKubectlDeliveryImageDefault = "ghcr.io/kubeflow/training/kubectl-delivery:latest" + MPIKubectlDeliveryImageDefault = "ghcr.io/kubeflow/training-v1/kubectl-delivery:latest" ) diff --git a/sdk/python/kubeflow/training/constants/constants.py b/sdk/python/kubeflow/training/constants/constants.py index 9a9fb89f52..b69818ee7f 100644 --- a/sdk/python/kubeflow/training/constants/constants.py +++ b/sdk/python/kubeflow/training/constants/constants.py @@ -84,7 +84,7 @@ # TODO (andreyvelich): We should add image tag for Storage Initializer and Trainer. STORAGE_INITIALIZER_IMAGE = os.getenv( - "STORAGE_INITIALIZER_IMAGE", "ghcr.io/kubeflow/training/storage-initializer" + "STORAGE_INITIALIZER_IMAGE", "ghcr.io/kubeflow/training-v1/storage-initializer" ) STORAGE_INITIALIZER_VOLUME_MOUNT = models.V1VolumeMount( @@ -93,7 +93,7 @@ ) TRAINER_TRANSFORMER_IMAGE = os.getenv( - "TRAINER_TRANSFORMER_IMAGE", "ghcr.io/kubeflow/training/trainer-huggingface" + "TRAINER_TRANSFORMER_IMAGE", "ghcr.io/kubeflow/training-v1/trainer-huggingface" ) # TFJob constants. @@ -153,7 +153,7 @@ JAXJOB_PLURAL = "jaxjobs" JAXJOB_CONTAINER = "jax" JAXJOB_REPLICA_TYPES = REPLICA_TYPE_WORKER.lower() -JAXJOB_BASE_IMAGE = "ghcr.io/kubeflow/training/jaxjob-dist-spmd-mnist:latest" +JAXJOB_BASE_IMAGE = "ghcr.io/kubeflow/training-v1/jaxjob-dist-spmd-mnist:latest" # Dictionary to get plural, model, and container for each Job kind. JOB_PARAMETERS = { From b53fc3212f901150544191f79ec3c4646c32c369 Mon Sep 17 00:00:00 2001 From: sailesh duddupudi Date: Fri, 21 Mar 2025 09:07:10 +0000 Subject: [PATCH 5/5] update image in setup script Signed-off-by: sailesh duddupudi --- scripts/setup-training-operator.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/setup-training-operator.sh b/scripts/setup-training-operator.sh index a1cda80e68..122ca31ad5 100755 --- a/scripts/setup-training-operator.sh +++ b/scripts/setup-training-operator.sh @@ -34,7 +34,7 @@ aws eks update-kubeconfig --region=${REGION} --name=${CLUSTER_NAME} echo "Update training operator manifest with new name $REGISTRY and tag $VERSION" cd manifests/overlays/standalone #kustomize edit set image public.ecr.aws/j1r0q0g6/training/training-operator=${REGISTRY}:${VERSION} -kustomize edit set image kubeflow/training-operator=${REGISTRY}:${VERSION} +kustomize edit set image ghcr.io/kubeflow/training-v1/training-operator=${REGISTRY}:${VERSION} echo "Installing training operator manifests" kustomize build . | kubectl apply --server-side -f -