meta-llama/Llama-3.1-8B-Instruct: decrease gpu_memory_utilization to prevent OOM (ROCm)

dtrifiro · dtrifiro · commit 049aaf153fdd · 2025-05-09T17:01:53.000+02:00
diff --git a/meta-llama/Llama-3.1-8B-Instruct/accuracy/server-rocm.yml b/meta-llama/Llama-3.1-8B-Instruct/accuracy/server-rocm.yml
@@ -1,4 +1,4 @@
 trust-remote-code: true
 tensor-parallel-size: 1
 max-model-len: 16384
-gpu_memory_utilization: 0.8
+gpu_memory_utilization: 0.6