Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions examples/deepspeed/text-summarization/T5-Fine-Tuning.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"id": "35f06c45b614ecd0",
"metadata": {
"jupyter": {
Expand All @@ -118,7 +118,7 @@
},
"outputs": [],
"source": [
"def deepspeed_train_t5(args):\n",
"def deepspeed_train_t5(num_samples: str, model_name: str, bucket: str):\n",
" import os\n",
" import time\n",
" import boto3\n",
Expand Down Expand Up @@ -187,11 +187,11 @@
" print(\"Downloading T5 Model\")\n",
" print(\"-\" * 100)\n",
"\n",
" model = T5ForConditionalGeneration.from_pretrained(args[\"MODEL_NAME\"])\n",
" tokenizer = T5Tokenizer.from_pretrained(args[\"MODEL_NAME\"])\n",
" model = T5ForConditionalGeneration.from_pretrained(model_name)\n",
" tokenizer = T5Tokenizer.from_pretrained(model_name)\n",
"\n",
" # Download dataset.\n",
" dataset = wikihow(tokenizer, num_samples=int(args[\"NUM_SAMPLES\"]))\n",
" dataset = wikihow(tokenizer, num_samples=int(num_samples))\n",
" train_loader = torch.utils.data.DataLoader(\n",
" dataset, batch_size=4, sampler=DistributedSampler(dataset)\n",
" )\n",
Expand Down Expand Up @@ -271,15 +271,15 @@
" print(\"-\" * 100)\n",
"\n",
" print(\"Exporting HuggingFace model to S3\")\n",
" MODEL_PATH = os.path.join(\"/home/mpiuser\", args[\"MODEL_NAME\"])\n",
" MODEL_PATH = os.path.join(\"/home/mpiuser\", model_name)\n",
" model.module.save_pretrained(MODEL_PATH)\n",
" tokenizer.save_pretrained(MODEL_PATH)\n",
"\n",
" bucket = boto3.resource(\"s3\").Bucket(args[\"BUCKET\"])\n",
" s3_bucket = boto3.resource(\"s3\").Bucket(bucket)\n",
" for file in os.listdir(MODEL_PATH):\n",
" print(f\"Uploading file {os.path.join(MODEL_PATH, file)}\")\n",
" bucket.upload_file(\n",
" os.path.join(MODEL_PATH, file), os.path.join(args[\"MODEL_NAME\"], file)\n",
" s3_bucket.upload_file(\n",
" os.path.join(MODEL_PATH, file), os.path.join(model_name, file)\n",
" )"
]
},
Expand Down Expand Up @@ -358,15 +358,15 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"id": "76dab189-f184-4e48-be74-f32c0dea675b",
"metadata": {},
"outputs": [],
"source": [
"args = {\n",
" \"NUM_SAMPLES\": \"2000\",\n",
" \"MODEL_NAME\": MODEL_NAME,\n",
" \"BUCKET\": BUCKET_NAME,\n",
" \"num_samples\": \"2000\",\n",
" \"model_name\": MODEL_NAME,\n",
" \"bucket\": BUCKET_NAME,\n",
"}\n",
"\n",
"job_id = TrainerClient().train(\n",
Expand Down
12 changes: 6 additions & 6 deletions examples/mlx/image-classification/mnist.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"id": "57952ce1-5752-4976-8a35-c71d25935b74",
"metadata": {
"execution": {
Expand All @@ -138,7 +138,7 @@
},
"outputs": [],
"source": [
"def mlx_train_mnist(args):\n",
"def mlx_train_mnist(bucket: str, model_name: str):\n",
" import time\n",
" from functools import partial\n",
" import mlx.core as mx\n",
Expand Down Expand Up @@ -258,8 +258,8 @@
" # Finally, export model to S3.\n",
" if rank == 0:\n",
" print(\"Exporting MNIST model to S3\")\n",
" model.save_weights(args[\"MODEL\"])\n",
" boto3.client(\"s3\").upload_file(args[\"MODEL\"], args[\"BUCKET\"], args[\"MODEL\"])"
" model.save_weights(model_name)\n",
" boto3.client(\"s3\").upload_file(model_name, bucket, model_name)"
]
},
{
Expand Down Expand Up @@ -339,7 +339,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": null,
"id": "76dab189-f184-4e48-be74-f32c0dea675b",
"metadata": {
"execution": {
Expand All @@ -352,7 +352,7 @@
},
"outputs": [],
"source": [
"args = {\"BUCKET\": BUCKET, \"MODEL\": MODEL}\n",
"args = {\"bucket\": BUCKET, \"model_name\": MODEL}\n",
"\n",
"job_id = TrainerClient().train(\n",
" trainer=CustomTrainer(\n",
Expand Down
26 changes: 12 additions & 14 deletions examples/mlx/language-modeling/fine-tune-llama.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"id": "f626231c-3f49-40d6-bd37-6d8aff83cede",
"metadata": {
"execution": {
Expand All @@ -112,7 +112,7 @@
},
"outputs": [],
"source": [
"def fine_tune_llama(func_args):\n",
"def fine_tune_llama(hf_token: str, num_samples: str, batch_size: str):\n",
" import types\n",
" import os\n",
" import mlx.core as mx\n",
Expand All @@ -121,7 +121,7 @@
" from mlx_lm.utils import load\n",
" from mlx_lm.generate import generate\n",
"\n",
" os.environ[\"HF_TOKEN\"] = func_args[\"HF_TOKEN\"]\n",
" os.environ[\"HF_TOKEN\"] = hf_token\n",
"\n",
" # Set parameters for the mlx-lm.\n",
" args = types.SimpleNamespace()\n",
Expand All @@ -135,8 +135,8 @@
" \"scale\": 20.0,\n",
" }\n",
"\n",
" args.iters = int(func_args[\"NUM_SAMPLES\"])\n",
" args.batch_size = int(func_args[\"BATCH_SIZE\"])\n",
" args.iters = int(num_samples)\n",
" args.batch_size = int(batch_size)\n",
"\n",
" # Set defaults for other required parameters\n",
" for k, v in CONFIG_DEFAULTS.items():\n",
Expand Down Expand Up @@ -367,7 +367,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"id": "c87e3d06-1cbf-4e14-acad-10a7dc2292a7",
"metadata": {
"execution": {
Expand All @@ -380,17 +380,15 @@
},
"outputs": [],
"source": [
"args = {\n",
" \"HF_TOKEN\": HF_TOKEN,\n",
" \"NUM_SAMPLES\": \"100\",\n",
" # Batch size must be divisible by the number of GPUs. (8 / 4 = 2) per training node.\n",
" \"BATCH_SIZE\": \"8\",\n",
"}\n",
"\n",
"job_id = TrainerClient().train(\n",
" trainer=CustomTrainer(\n",
" func=fine_tune_llama,\n",
" func_args=args,\n",
" func_args={\n",
" \"hf_token\": HF_TOKEN,\n",
" \"num_samples\": \"100\",\n",
" # Batch size must be divisible by the number of GPUs. (8 / 4 = 2) per training node.\n",
" \"batch_size\": \"8\",\n",
" },\n",
" num_nodes=4, # Fine-Tune Llama3.2 on 4 GPUs.\n",
" ),\n",
" runtime=mlx_runtime,\n",
Expand Down
20 changes: 10 additions & 10 deletions examples/pytorch/question-answering/fine-tune-distilbert.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,12 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"id": "24e7f396-32ce-4d23-b76f-9684de470471",
"metadata": {},
"outputs": [],
"source": [
"def train_distilbert(args):\n",
"def train_distilbert(model_name: str, bucket: str = None):\n",
" import os\n",
"\n",
" from cloudpathlib import CloudPath\n",
Expand Down Expand Up @@ -170,7 +170,7 @@
"\n",
" squad = squad.train_test_split(test_size=0.2, shuffle=False)\n",
" \n",
" tokenizer = AutoTokenizer.from_pretrained(f'distilbert/{args[\"MODEL_NAME\"]}')\n",
" tokenizer = AutoTokenizer.from_pretrained(f'distilbert/{model_name}')\n",
" \n",
" # Define the preprocessing function\n",
" def preprocess_function(examples):\n",
Expand Down Expand Up @@ -231,11 +231,11 @@
" data_collator = DefaultDataCollator()\n",
"\n",
" # Load the model\n",
" model = AutoModelForQuestionAnswering.from_pretrained(f'distilbert/{args[\"MODEL_NAME\"]}')\n",
" model = AutoModelForQuestionAnswering.from_pretrained(f'distilbert/{model_name}')\n",
"\n",
" # Define training hyperparameters\n",
" training_args = TrainingArguments(\n",
" output_dir=args[\"MODEL_NAME\"],\n",
" output_dir=model_name,\n",
" eval_strategy=\"epoch\",\n",
" learning_rate=2e-5,\n",
" per_device_train_batch_size=1,\n",
Expand All @@ -258,8 +258,8 @@
" trainer.train()\n",
"\n",
" # Upload the fine-tuned model\n",
" if args.get(\"BUCKET\", None):\n",
" (CloudPath(args[\"BUCKET\"]) / args[\"MODEL_NAME\"]).upload_from(args[\"MODEL_NAME\"])"
" if bucket:\n",
" (CloudPath(bucket) / model_name).upload_from(model_name)"
]
},
{
Expand Down Expand Up @@ -296,7 +296,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"id": "e3fd0c5f-f359-4c6c-9f0e-2e91904579b3",
"metadata": {},
"outputs": [],
Expand All @@ -306,8 +306,8 @@
"\n",
"MODEL_NAME = \"distilbert-base-uncased\"\n",
"args = {\n",
" \"BUCKET\": BUCKET,\n",
" \"MODEL_NAME\": MODEL_NAME,\n",
" \"model_name\": MODEL_NAME,\n",
" \"bucket\": BUCKET,\n",
"}\n",
"\n",
"job_id = TrainerClient().train(\n",
Expand Down
Loading