Update llama2_full.rst and training_llm.rst

qiuosier · qiuosier · commit a6d855ffaf31 · 2023-10-18T11:57:24.000-04:00
diff --git a/docs/source/user_guide/jobs/tabs/llama2_full.rst b/docs/source/user_guide/jobs/tabs/llama2_full.rst
@@ -14,42 +14,32 @@
             .with_compartment_id("<compartment_ocid>")
             .with_project_id("<project_ocid>")
             .with_subnet_id("<subnet_ocid>")
-            .with_shape_name("VM.GPU.A10.1")
+            .with_shape_name("VM.GPU.A10.2")
             .with_block_storage_size(256)
         )
         .with_runtime(
             PyTorchDistributedRuntime()
             # Specify the service conda environment by slug name.
-            .with_service_conda("pytorch20_p39_gpu_v1")
+            .with_service_conda("pytorch20_p39_gpu_v2")
             .with_git(
               url="https://github.com/facebookresearch/llama-recipes.git",
-              commit="03faba661f079ee1ecaeb66deaa6bdec920a7bab"
+              commit="1aecd00924738239f8d86f342b36bacad180d2b3"
             )
             .with_dependency(
               pip_pkg=" ".join([
-                "'accelerate>=0.21.0'",
-                "appdirs",
-                "loralib",
-                "bitsandbytes==0.39.1",
-                "black",
-                "'black[jupyter]'",
-                "datasets",
-                "fire",
-                "'git+https://github.com/huggingface/peft.git'",
-                "'transformers>=4.31.0'",
-                "sentencepiece",
-                "py7zr",
-                "scipy",
-                "optimum"
+                "--extra-index-url https://download.pytorch.org/whl/cu118 torch==2.1.0",
+                "git+https://github.com/huggingface/peft.git@15a013af5ff5660b9377af24d3eee358213d72d4"
+                "appdirs==1.4.4",
+                "llama-recipes==0.0.1",
+                "py7zr==0.20.6",
               ])
             )
             .with_output("/home/datascience/outputs", "oci://bucket@namespace/outputs/$JOB_RUN_OCID")
             .with_command(" ".join([
-              "torchrun llama_finetuning.py",
+              "torchrun examples/finetuning.py",
               "--enable_fsdp",
               "--pure_bf16",
               "--batch_size_training 1",
-              "--micro_batch_size 1",
               "--model_name $MODEL_NAME",
               "--dist_checkpoint_root_folder /home/datascience/outputs",
               "--dist_checkpoint_folder fine-tuned"
@@ -87,36 +77,26 @@
         spec:
           git:
             url: https://github.com/facebookresearch/llama-recipes.git
-            commit: 03faba661f079ee1ecaeb66deaa6bdec920a7bab
+            commit: 1aecd00924738239f8d86f342b36bacad180d2b3
           command: >-
             torchrun llama_finetuning.py
             --enable_fsdp
             --pure_bf16
             --batch_size_training 1
-            --micro_batch_size 1
             --model_name $MODEL_NAME
             --dist_checkpoint_root_folder /home/datascience/outputs
             --dist_checkpoint_folder fine-tuned
           replicas: 2
           conda:
             type: service
-            slug: pytorch20_p39_gpu_v1
+            slug: pytorch20_p39_gpu_v2
           dependencies:
             pipPackages: >-
-              'accelerate>=0.21.0'
-              appdirs
-              loralib
-              bitsandbytes==0.39.1
-              black
-              'black[jupyter]'
-              datasets
-              fire
-              'git+https://github.com/huggingface/peft.git'
-              'transformers>=4.31.0'
-              sentencepiece
-              py7zr
-              scipy
-              optimum
+              --extra-index-url https://download.pytorch.org/whl/cu118 torch==2.1.0
+              git+https://github.com/huggingface/peft.git@15a013af5ff5660b9377af24d3eee358213d72d4
+              llama-recipes==0.0.1
+              appdirs==1.4.4
+              py7zr==0.20.6
           outputDir: /home/datascience/outputs
           outputUri: oci://bucket@namespace/outputs/$JOB_RUN_OCID
           env:
diff --git a/docs/source/user_guide/model_training/training_llm.rst b/docs/source/user_guide/model_training/training_llm.rst
@@ -16,9 +16,9 @@ This page shows an example of fine-tuning the `Llama 2 <https://ai.meta.com/llam
   In this example, internet access is needed to download the source code and the pre-trained model.
 
 The `llama-recipes <llama-recipes>`_ repository contains example code to fine-tune llama2 model.
-The example `fine-tuning script <https://github.com/facebookresearch/llama-recipes/blob/main/llama_finetuning.py>`_ support full parameter fine-tuning
+The example `fine-tuning script <https://github.com/facebookresearch/llama-recipes/blob/1aecd00924738239f8d86f342b36bacad180d2b3/examples/finetuning.py>`_ supports both full parameter fine-tuning
 and `Parameter-Efficient Fine-Tuning (PEFT) <https://huggingface.co/blog/peft>`_.
-With ADS, you can start the training job by taking the source code directly from Github.
+With ADS, you can start the training job by taking the source code directly from Github with no code change.
 
 Access the Pre-Trained Model
 ============================
@@ -49,13 +49,16 @@ The job run will:
 
 Note that in the training command, there is no need specify the number of nodes, or the number of GPUs. ADS will automatically configure that base on the ``replica`` and ``shape`` you specified.
 
-The fine-tuning runs on the `samsum <https://huggingface.co/datasets/samsum>`_ dataset by default. You can also `add your custom datasets <https://github.com/facebookresearch/llama-recipes/blob/main/docs/Dataset.md#adding-custom-datasets>`_.
+The fine-tuning runs on the `samsum <https://huggingface.co/datasets/samsum>`_ dataset by default. You can also `add your custom datasets <https://github.com/facebookresearch/llama-recipes/blob/1aecd00924738239f8d86f342b36bacad180d2b3/docs/Dataset.md>`_.
 
-The same training script also support Parameter-Efficient Fine-Tuning (PEFT). You can change the ``command`` to the following for PEFT with `LoRA <https://huggingface.co/docs/peft/conceptual_guides/lora>`_
+Once the fine-tuning is finished, the checkpoints will be saved into OCI object storage bucket as specified.
+You can `load the FSDP checkpoints for inferencing <https://github.com/facebookresearch/llama-recipes/blob/main/docs/inference.md#loading-back-fsdp-checkpoints>`_.
+
+The same training script also support Parameter-Efficient Fine-Tuning (PEFT). You can change the ``command`` to the following for PEFT with `LoRA <https://huggingface.co/docs/peft/conceptual_guides/lora>`_. Note that for PEFT, the fine-tuned weights are stored in the location specified by ``--output_dir``, while for full parameter fine-tuning, the checkpoints are stored in the location specified by ``--dist_checkpoint_root_folder`` and ``--dist_checkpoint_folder``
 
 .. code-block:: bash
 
     torchrun llama_finetuning.py --enable_fsdp --use_peft --peft_method lora \
-    --pure_bf16 --batch_size_training 1 --micro_batch_size 1 \
-    --model_name /home/datascience/llama --output_dir /home/datascience/outputs
+    --pure_bf16 --batch_size_training 1 \
+    --model_name meta-llama/Llama-2-7b-hf --output_dir /home/datascience/outputs