AI-Hypercomputer
diff --git a/‎.github/workflows/e2e_test.yml
Lines changed: 29 additions & 1 deletion b/‎.github/workflows/e2e_test.yml
Lines changed: 29 additions & 1 deletion
diff --git a/‎torchprime/torch_xla_models/configs/model/llama-3-8b.yaml
Lines changed: 2 additions & 1 deletion b/‎torchprime/torch_xla_models/configs/model/llama-3-8b.yaml
Lines changed: 2 additions & 1 deletion
diff --git a/‎torchprime/torch_xla_models/configs/model/llama-3.1-405b.yaml
Lines changed: 2 additions & 1 deletion b/‎torchprime/torch_xla_models/configs/model/llama-3.1-405b.yaml
Lines changed: 2 additions & 1 deletion
diff --git a/‎torchprime/torch_xla_models/configs/model/llama-3.1-8b.yaml
Lines changed: 2 additions & 1 deletion b/‎torchprime/torch_xla_models/configs/model/llama-3.1-8b.yaml
Lines changed: 2 additions & 1 deletion
diff --git a/‎torchprime/torch_xla_models/configs/model/mixtral-8x7b.yaml
Lines changed: 2 additions & 1 deletion b/‎torchprime/torch_xla_models/configs/model/mixtral-8x7b.yaml
Lines changed: 2 additions & 1 deletion
@@ -16,6 +16,7 @@ jobs:
       ARTIFACT_DIR: gs://torchprime-e2e-tests/${{ github.job }}/${{ github.run_id }}-${{ github.run_attempt }}
     outputs:
       llama-3-8b-name: ${{ steps.run-llama-3-8b.outputs.name }}
+      llama-3_1-8b-sa-name: ${{ steps.run-llama-3_1-8b-SplashAttention.outputs.name }}
       llama-3-8b-2d-name: ${{ steps.run-llama-3-8b-2d.outputs.name }}
       llama-3-8b-2-slice-name: ${{ steps.run-llama-3-8b-2-slice.outputs.name }}
       mixtral-8x7b-name: ${{ steps.run-mixtral-8x7b.outputs.name }}
@@ -61,7 +62,25 @@ jobs:
             dataset_config_name=wikitext-2-raw-v1 \
             profile_step=3 \
             max_steps=15
-
+      - name: Run Llama 3.1 8B (Splash Attention)
+        id: run-llama-3_1-8b-SplashAttention
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          XLA_IR_DEBUG: 1
+          XLA_HLO_DEBUG: 1
+        run: |
+          name=$(e2e_testing/gen_name.py llama-3dot1-8b-sa)
+          echo "name=$name" >> "$GITHUB_OUTPUT"
+          tp run \
+            --name $name \
+            torchprime/torch_xla_models/train.py \
+            model=llama-3.1-8b \
+            model.attention_kernel=splash_attention \
+            global_batch_size=8 \
+            ici_mesh.fsdp=4 \
+            dataset_config_name=wikitext-2-raw-v1 \
+            profile_step=3 \
+            max_steps=15
       - name: Run Llama 3.0 8B (2D sharding)
         id: run-llama-3-8b-2d
         env:
@@ -134,6 +153,15 @@ jobs:
       artifact_dir: ${{ needs.tp-run.outputs.artifact-dir }}
     secrets: inherit
 
+  llama-3_1-8b-sa:
+    name: Llama 3.1 8B (Spalsh Attention)
+    needs: tp-run
+    uses: ./.github/workflows/reusable_e2e_check.yml
+    with:
+      jobset_name: ${{ needs.tp-run.outputs.llama-3_1-8b-sa-name }}
+      artifact_dir: ${{ needs.tp-run.outputs.artifact-dir }}
+    secrets: inherit
+
   llama-3-8b-2d:
     name: Llama 3.0 8B (2D sharding)
     needs: tp-run
 
@@ -18,5 +18,6 @@ initializer_range: 0.02
 rms_norm_eps: 1.0e-05
 attention_dropout: false
 attention_bias: false
-flash_attention: true
+# choose attention_kernel from: [flash_attention, splash_attention, null]
+attention_kernel: flash_attention
 rope_theta: 500000.0
@@ -18,7 +18,8 @@ initializer_range: 0.02
 rms_norm_eps: 1.0e-05
 attention_dropout: false
 attention_bias: false
-flash_attention: true
+# choose attention_kernel from: [flash_attention, splash_attention, null]
+attention_kernel: flash_attention
 rope_theta: 500000.0
 rope_scaling:
   factor: 8.0
 
@@ -18,7 +18,8 @@ initializer_range: 0.02
 rms_norm_eps: 1.0e-05
 attention_dropout: false
 attention_bias: false
-flash_attention: true
+# choose attention_kernel from: [flash_attention, splash_attention, null]
+attention_kernel: flash_attention
 rope_theta: 500000.0
 rope_scaling:
   factor: 8.0
 
@@ -20,6 +20,7 @@ router_aux_loss_coef: 0.02
 vocab_size: 32000
 attention_bias: false
 attention_dropout: 0.0
-flash_attention: true
+# choose attention_kernel from: [flash_attention, splash_attention, null]
+attention_kernel: flash_attention
 moe_implementation: gmm
 tokenizer_name: mistralai/Mixtral-8x7B-v0.1