|
14 | 14 | .with_compartment_id("<compartment_ocid>")
|
15 | 15 | .with_project_id("<project_ocid>")
|
16 | 16 | .with_subnet_id("<subnet_ocid>")
|
17 |
| - .with_shape_name("VM.GPU.A10.1") |
| 17 | + .with_shape_name("VM.GPU.A10.2") |
18 | 18 | .with_block_storage_size(256)
|
19 | 19 | )
|
20 | 20 | .with_runtime(
|
21 | 21 | PyTorchDistributedRuntime()
|
22 | 22 | # Specify the service conda environment by slug name.
|
23 |
| - .with_service_conda("pytorch20_p39_gpu_v1") |
| 23 | + .with_service_conda("pytorch20_p39_gpu_v2") |
24 | 24 | .with_git(
|
25 | 25 | url="https://github.com/facebookresearch/llama-recipes.git",
|
26 |
| - commit="03faba661f079ee1ecaeb66deaa6bdec920a7bab" |
| 26 | + commit="1aecd00924738239f8d86f342b36bacad180d2b3" |
27 | 27 | )
|
28 | 28 | .with_dependency(
|
29 | 29 | pip_pkg=" ".join([
|
30 |
| - "'accelerate>=0.21.0'", |
31 |
| - "appdirs", |
32 |
| - "loralib", |
33 |
| - "bitsandbytes==0.39.1", |
34 |
| - "black", |
35 |
| - "'black[jupyter]'", |
36 |
| - "datasets", |
37 |
| - "fire", |
38 |
| - "'git+https://github.com/huggingface/peft.git'", |
39 |
| - "'transformers>=4.31.0'", |
40 |
| - "sentencepiece", |
41 |
| - "py7zr", |
42 |
| - "scipy", |
43 |
| - "optimum" |
| 30 | + "--extra-index-url https://download.pytorch.org/whl/cu118 torch==2.1.0", |
| 31 | + "git+https://github.com/huggingface/peft.git@15a013af5ff5660b9377af24d3eee358213d72d4" |
| 32 | + "appdirs==1.4.4", |
| 33 | + "llama-recipes==0.0.1", |
| 34 | + "py7zr==0.20.6", |
44 | 35 | ])
|
45 | 36 | )
|
46 | 37 | .with_output("/home/datascience/outputs", "oci://bucket@namespace/outputs/$JOB_RUN_OCID")
|
47 | 38 | .with_command(" ".join([
|
48 |
| - "torchrun llama_finetuning.py", |
| 39 | + "torchrun examples/finetuning.py", |
49 | 40 | "--enable_fsdp",
|
50 | 41 | "--pure_bf16",
|
51 | 42 | "--batch_size_training 1",
|
52 |
| - "--micro_batch_size 1", |
53 | 43 | "--model_name $MODEL_NAME",
|
54 | 44 | "--dist_checkpoint_root_folder /home/datascience/outputs",
|
55 | 45 | "--dist_checkpoint_folder fine-tuned"
|
|
87 | 77 | spec:
|
88 | 78 | git:
|
89 | 79 | url: https://github.com/facebookresearch/llama-recipes.git
|
90 |
| - commit: 03faba661f079ee1ecaeb66deaa6bdec920a7bab |
| 80 | + commit: 1aecd00924738239f8d86f342b36bacad180d2b3 |
91 | 81 | command: >-
|
92 | 82 | torchrun llama_finetuning.py
|
93 | 83 | --enable_fsdp
|
94 | 84 | --pure_bf16
|
95 | 85 | --batch_size_training 1
|
96 |
| - --micro_batch_size 1 |
97 | 86 | --model_name $MODEL_NAME
|
98 | 87 | --dist_checkpoint_root_folder /home/datascience/outputs
|
99 | 88 | --dist_checkpoint_folder fine-tuned
|
100 | 89 | replicas: 2
|
101 | 90 | conda:
|
102 | 91 | type: service
|
103 |
| - slug: pytorch20_p39_gpu_v1 |
| 92 | + slug: pytorch20_p39_gpu_v2 |
104 | 93 | dependencies:
|
105 | 94 | pipPackages: >-
|
106 |
| - 'accelerate>=0.21.0' |
107 |
| - appdirs |
108 |
| - loralib |
109 |
| - bitsandbytes==0.39.1 |
110 |
| - black |
111 |
| - 'black[jupyter]' |
112 |
| - datasets |
113 |
| - fire |
114 |
| - 'git+https://github.com/huggingface/peft.git' |
115 |
| - 'transformers>=4.31.0' |
116 |
| - sentencepiece |
117 |
| - py7zr |
118 |
| - scipy |
119 |
| - optimum |
| 95 | + --extra-index-url https://download.pytorch.org/whl/cu118 torch==2.1.0 |
| 96 | + git+https://github.com/huggingface/peft.git@15a013af5ff5660b9377af24d3eee358213d72d4 |
| 97 | + llama-recipes==0.0.1 |
| 98 | + appdirs==1.4.4 |
| 99 | + py7zr==0.20.6 |
120 | 100 | outputDir: /home/datascience/outputs
|
121 | 101 | outputUri: oci://bucket@namespace/outputs/$JOB_RUN_OCID
|
122 | 102 | env:
|
|
0 commit comments