kubeflow · google-oss-prow · Sep 7, 2025 · Sep 4, 2025 · Sep 6, 2025
diff --git a/examples/deepspeed/text-summarization/T5-Fine-Tuning.ipynb b/examples/deepspeed/text-summarization/T5-Fine-Tuning.ipynb
@@ -109,7 +109,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "35f06c45b614ecd0",
    "metadata": {
     "jupyter": {
@@ -118,7 +118,7 @@
    },
    "outputs": [],
    "source": [
-    "def deepspeed_train_t5(args):\n",
+    "def deepspeed_train_t5(num_samples: str, model_name: str, bucket: str):\n",
     "    import os\n",
     "    import time\n",
     "    import boto3\n",
@@ -187,11 +187,11 @@
     "        print(\"Downloading T5 Model\")\n",
     "        print(\"-\" * 100)\n",
     "\n",
-    "    model = T5ForConditionalGeneration.from_pretrained(args[\"MODEL_NAME\"])\n",
-    "    tokenizer = T5Tokenizer.from_pretrained(args[\"MODEL_NAME\"])\n",
+    "    model = T5ForConditionalGeneration.from_pretrained(model_name)\n",
+    "    tokenizer = T5Tokenizer.from_pretrained(model_name)\n",
     "\n",
     "    # Download dataset.\n",
-    "    dataset = wikihow(tokenizer, num_samples=int(args[\"NUM_SAMPLES\"]))\n",
+    "    dataset = wikihow(tokenizer, num_samples=int(num_samples))\n",
     "    train_loader = torch.utils.data.DataLoader(\n",
     "        dataset, batch_size=4, sampler=DistributedSampler(dataset)\n",
     "    )\n",
@@ -271,15 +271,15 @@
     "        print(\"-\" * 100)\n",
     "\n",
     "        print(\"Exporting HuggingFace model to S3\")\n",
-    "        MODEL_PATH = os.path.join(\"/home/mpiuser\", args[\"MODEL_NAME\"])\n",
+    "        MODEL_PATH = os.path.join(\"/home/mpiuser\", model_name)\n",
     "        model.module.save_pretrained(MODEL_PATH)\n",
     "        tokenizer.save_pretrained(MODEL_PATH)\n",
     "\n",
-    "        bucket = boto3.resource(\"s3\").Bucket(args[\"BUCKET\"])\n",
+    "        s3_bucket = boto3.resource(\"s3\").Bucket(bucket)\n",
     "        for file in os.listdir(MODEL_PATH):\n",
     "            print(f\"Uploading file {os.path.join(MODEL_PATH, file)}\")\n",
-    "            bucket.upload_file(\n",
-    "                os.path.join(MODEL_PATH, file), os.path.join(args[\"MODEL_NAME\"], file)\n",
+    "            s3_bucket.upload_file(\n",
+    "                os.path.join(MODEL_PATH, file), os.path.join(model_name, file)\n",
     "            )"
    ]
   },
@@ -358,15 +358,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "76dab189-f184-4e48-be74-f32c0dea675b",
    "metadata": {},
    "outputs": [],
    "source": [
     "args = {\n",
-    "    \"NUM_SAMPLES\": \"2000\",\n",
-    "    \"MODEL_NAME\": MODEL_NAME,\n",
-    "    \"BUCKET\": BUCKET_NAME,\n",
+    "    \"num_samples\": \"2000\",\n",
+    "    \"model_name\": MODEL_NAME,\n",
+    "    \"bucket\": BUCKET_NAME,\n",
     "}\n",
     "\n",
     "job_id = TrainerClient().train(\n",

diff --git a/examples/mlx/image-classification/mnist.ipynb b/examples/mlx/image-classification/mnist.ipynb
@@ -125,7 +125,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "id": "57952ce1-5752-4976-8a35-c71d25935b74",
    "metadata": {
     "execution": {
@@ -138,7 +138,7 @@
    },
    "outputs": [],
    "source": [
-    "def mlx_train_mnist(args):\n",
+    "def mlx_train_mnist(bucket: str, model_name: str):\n",
     "    import time\n",
     "    from functools import partial\n",
     "    import mlx.core as mx\n",
@@ -258,8 +258,8 @@
     "    # Finally, export model to S3.\n",
     "    if rank == 0:\n",
     "        print(\"Exporting MNIST model to S3\")\n",
-    "        model.save_weights(args[\"MODEL\"])\n",
-    "        boto3.client(\"s3\").upload_file(args[\"MODEL\"], args[\"BUCKET\"], args[\"MODEL\"])"
+    "        model.save_weights(model_name)\n",
+    "        boto3.client(\"s3\").upload_file(model_name, bucket, model_name)"
    ]
   },
   {
@@ -339,7 +339,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "id": "76dab189-f184-4e48-be74-f32c0dea675b",
    "metadata": {
     "execution": {
@@ -352,7 +352,7 @@
    },
    "outputs": [],
    "source": [
-    "args = {\"BUCKET\": BUCKET, \"MODEL\": MODEL}\n",
+    "args = {\"bucket\": BUCKET, \"model_name\": MODEL}\n",
     "\n",
     "job_id = TrainerClient().train(\n",
     "    trainer=CustomTrainer(\n",

diff --git a/examples/mlx/language-modeling/fine-tune-llama.ipynb b/examples/mlx/language-modeling/fine-tune-llama.ipynb
@@ -99,7 +99,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "f626231c-3f49-40d6-bd37-6d8aff83cede",
    "metadata": {
     "execution": {
@@ -112,7 +112,7 @@
    },
    "outputs": [],
    "source": [
-    "def fine_tune_llama(func_args):\n",
+    "def fine_tune_llama(hf_token: str, num_samples: str, batch_size: str):\n",
     "    import types\n",
     "    import os\n",
     "    import mlx.core as mx\n",
@@ -121,7 +121,7 @@
     "    from mlx_lm.utils import load\n",
     "    from mlx_lm.generate import generate\n",
     "\n",
-    "    os.environ[\"HF_TOKEN\"] = func_args[\"HF_TOKEN\"]\n",
+    "    os.environ[\"HF_TOKEN\"] = hf_token\n",
     "\n",
     "    # Set parameters for the mlx-lm.\n",
     "    args = types.SimpleNamespace()\n",
@@ -135,8 +135,8 @@
     "        \"scale\": 20.0,\n",
     "    }\n",
     "\n",
-    "    args.iters = int(func_args[\"NUM_SAMPLES\"])\n",
-    "    args.batch_size = int(func_args[\"BATCH_SIZE\"])\n",
+    "    args.iters = int(num_samples)\n",
+    "    args.batch_size = int(batch_size)\n",
     "\n",
     "    # Set defaults for other required parameters\n",
     "    for k, v in CONFIG_DEFAULTS.items():\n",
@@ -367,7 +367,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "id": "c87e3d06-1cbf-4e14-acad-10a7dc2292a7",
    "metadata": {
     "execution": {
@@ -380,17 +380,15 @@
    },
    "outputs": [],
    "source": [
-    "args = {\n",
-    "    \"HF_TOKEN\": HF_TOKEN,\n",
-    "    \"NUM_SAMPLES\": \"100\",\n",
-    "    # Batch size must be divisible by the number of GPUs. (8 / 4 = 2) per training node.\n",
-    "    \"BATCH_SIZE\": \"8\",\n",
-    "}\n",
-    "\n",
     "job_id = TrainerClient().train(\n",
     "    trainer=CustomTrainer(\n",
     "        func=fine_tune_llama,\n",
-    "        func_args=args,\n",
+    "        func_args={\n",
+    "            \"hf_token\": HF_TOKEN,\n",
+    "            \"num_samples\": \"100\",\n",
+    "            # Batch size must be divisible by the number of GPUs. (8 / 4 = 2) per training node.\n",
+    "            \"batch_size\": \"8\",\n",
+    "        },\n",
     "        num_nodes=4,  # Fine-Tune Llama3.2 on 4 GPUs.\n",
     "    ),\n",
     "    runtime=mlx_runtime,\n",

diff --git a/examples/pytorch/question-answering/fine-tune-distilbert.ipynb b/examples/pytorch/question-answering/fine-tune-distilbert.ipynb
@@ -137,12 +137,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "24e7f396-32ce-4d23-b76f-9684de470471",
    "metadata": {},
    "outputs": [],
    "source": [
-    "def train_distilbert(args):\n",
+    "def train_distilbert(model_name: str, bucket: str = None):\n",
     "    import os\n",
     "\n",
     "    from cloudpathlib import CloudPath\n",
@@ -170,7 +170,7 @@
     "\n",
     "    squad = squad.train_test_split(test_size=0.2, shuffle=False)\n",
     "    \n",
-    "    tokenizer = AutoTokenizer.from_pretrained(f'distilbert/{args[\"MODEL_NAME\"]}')\n",
+    "    tokenizer = AutoTokenizer.from_pretrained(f'distilbert/{model_name}')\n",
     "    \n",
     "    # Define the preprocessing function\n",
     "    def preprocess_function(examples):\n",
@@ -231,11 +231,11 @@
     "    data_collator = DefaultDataCollator()\n",
     "\n",
     "    # Load the model\n",
-    "    model = AutoModelForQuestionAnswering.from_pretrained(f'distilbert/{args[\"MODEL_NAME\"]}')\n",
+    "    model = AutoModelForQuestionAnswering.from_pretrained(f'distilbert/{model_name}')\n",
     "\n",
     "    # Define training hyperparameters\n",
     "    training_args = TrainingArguments(\n",
-    "        output_dir=args[\"MODEL_NAME\"],\n",
+    "        output_dir=model_name,\n",
     "        eval_strategy=\"epoch\",\n",
     "        learning_rate=2e-5,\n",
     "        per_device_train_batch_size=1,\n",
@@ -258,8 +258,8 @@
     "    trainer.train()\n",
     "\n",
     "    # Upload the fine-tuned model\n",
-    "    if args.get(\"BUCKET\", None):\n",
-    "        (CloudPath(args[\"BUCKET\"]) / args[\"MODEL_NAME\"]).upload_from(args[\"MODEL_NAME\"])"
+    "    if bucket:\n",
+    "        (CloudPath(bucket) / model_name).upload_from(model_name)"
    ]
   },
   {
@@ -296,7 +296,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "e3fd0c5f-f359-4c6c-9f0e-2e91904579b3",
    "metadata": {},
    "outputs": [],
@@ -306,8 +306,8 @@
     "\n",
     "MODEL_NAME = \"distilbert-base-uncased\"\n",
     "args = {\n",
-    "    \"BUCKET\": BUCKET,\n",
-    "    \"MODEL_NAME\": MODEL_NAME,\n",
+    "    \"model_name\": MODEL_NAME,\n",
+    "    \"bucket\": BUCKET,\n",
     "}\n",
     "\n",
     "job_id = TrainerClient().train(\n",