diff --git a/Qwen/Qwen2.5-7B-Instruct/accuracy/server-rocm.yml b/Qwen/Qwen2.5-7B-Instruct/accuracy/server-rocm.yml new file mode 100644 index 0000000..f2d1abc --- /dev/null +++ b/Qwen/Qwen2.5-7B-Instruct/accuracy/server-rocm.yml @@ -0,0 +1,6 @@ +# common/accuracy/server.yml +trust-remote-code: true +tensor-parallel-size: 1 +max-model-len: 16384 +# override +gpu_memory_utilization: 0.6 diff --git a/RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8-dynamic/accuracy/server-rocm.yml b/RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8-dynamic/accuracy/server-rocm.yml new file mode 100644 index 0000000..ba5cd1c --- /dev/null +++ b/RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8-dynamic/accuracy/server-rocm.yml @@ -0,0 +1,6 @@ +# common/accuracy/server.yml +trust-remote-code: true +tensor-parallel-size: 1 +max-model-len: 16384 +# override +enforce-eager: true diff --git a/RedHatAI/Mistral-Small-24B-Instruct-2501-FP8-Dynamic/accuracy/server-rocm.yml b/RedHatAI/Mistral-Small-24B-Instruct-2501-FP8-Dynamic/accuracy/server-rocm.yml new file mode 100644 index 0000000..ba5cd1c --- /dev/null +++ b/RedHatAI/Mistral-Small-24B-Instruct-2501-FP8-Dynamic/accuracy/server-rocm.yml @@ -0,0 +1,6 @@ +# common/accuracy/server.yml +trust-remote-code: true +tensor-parallel-size: 1 +max-model-len: 16384 +# override +enforce-eager: true diff --git a/RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w8a8/accuracy/server-rocm.yml b/RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w8a8/accuracy/server-rocm.yml new file mode 100644 index 0000000..ba5cd1c --- /dev/null +++ b/RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w8a8/accuracy/server-rocm.yml @@ -0,0 +1,6 @@ +# common/accuracy/server.yml +trust-remote-code: true +tensor-parallel-size: 1 +max-model-len: 16384 +# override +enforce-eager: true diff --git a/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic/accuracy/server-rocm.yml b/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic/accuracy/server-rocm.yml new file mode 100644 index 0000000..ba5cd1c --- /dev/null +++ b/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic/accuracy/server-rocm.yml @@ -0,0 +1,6 @@ +# common/accuracy/server.yml +trust-remote-code: true +tensor-parallel-size: 1 +max-model-len: 16384 +# override +enforce-eager: true diff --git a/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w8a8/accuracy/server-rocm.yml b/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w8a8/accuracy/server-rocm.yml new file mode 100644 index 0000000..ba5cd1c --- /dev/null +++ b/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w8a8/accuracy/server-rocm.yml @@ -0,0 +1,6 @@ +# common/accuracy/server.yml +trust-remote-code: true +tensor-parallel-size: 1 +max-model-len: 16384 +# override +enforce-eager: true diff --git a/RedHatAI/Qwen2.5-7B-Instruct-FP8-dynamic/accuracy/server-rocm.yml b/RedHatAI/Qwen2.5-7B-Instruct-FP8-dynamic/accuracy/server-rocm.yml new file mode 100644 index 0000000..ba5cd1c --- /dev/null +++ b/RedHatAI/Qwen2.5-7B-Instruct-FP8-dynamic/accuracy/server-rocm.yml @@ -0,0 +1,6 @@ +# common/accuracy/server.yml +trust-remote-code: true +tensor-parallel-size: 1 +max-model-len: 16384 +# override +enforce-eager: true diff --git a/RedHatAI/phi-4-FP8-dynamic/accuracy/server-rocm.yml b/RedHatAI/phi-4-FP8-dynamic/accuracy/server-rocm.yml new file mode 100644 index 0000000..ba5cd1c --- /dev/null +++ b/RedHatAI/phi-4-FP8-dynamic/accuracy/server-rocm.yml @@ -0,0 +1,6 @@ +# common/accuracy/server.yml +trust-remote-code: true +tensor-parallel-size: 1 +max-model-len: 16384 +# override +enforce-eager: true diff --git a/ibm-granite/granite-3.1-8b-instruct/accuracy/server-rocm.yml b/ibm-granite/granite-3.1-8b-instruct/accuracy/server-rocm.yml new file mode 100644 index 0000000..ba5cd1c --- /dev/null +++ b/ibm-granite/granite-3.1-8b-instruct/accuracy/server-rocm.yml @@ -0,0 +1,6 @@ +# common/accuracy/server.yml +trust-remote-code: true +tensor-parallel-size: 1 +max-model-len: 16384 +# override +enforce-eager: true diff --git a/meta-llama/Llama-3.1-8B-Instruct/accuracy/server-rocm.yml b/meta-llama/Llama-3.1-8B-Instruct/accuracy/server-rocm.yml new file mode 100644 index 0000000..ba5cd1c --- /dev/null +++ b/meta-llama/Llama-3.1-8B-Instruct/accuracy/server-rocm.yml @@ -0,0 +1,6 @@ +# common/accuracy/server.yml +trust-remote-code: true +tensor-parallel-size: 1 +max-model-len: 16384 +# override +enforce-eager: true diff --git a/mistralai/Mistral-Small-24B-Instruct-2501/accuracy/server-rocm.yml b/mistralai/Mistral-Small-24B-Instruct-2501/accuracy/server-rocm.yml new file mode 100644 index 0000000..f2d1abc --- /dev/null +++ b/mistralai/Mistral-Small-24B-Instruct-2501/accuracy/server-rocm.yml @@ -0,0 +1,6 @@ +# common/accuracy/server.yml +trust-remote-code: true +tensor-parallel-size: 1 +max-model-len: 16384 +# override +gpu_memory_utilization: 0.6 diff --git a/mistralai/Mistral-Small-3.1-24B-Instruct-2503/accuracy/server-rocm.yml b/mistralai/Mistral-Small-3.1-24B-Instruct-2503/accuracy/server-rocm.yml new file mode 100644 index 0000000..f2d1abc --- /dev/null +++ b/mistralai/Mistral-Small-3.1-24B-Instruct-2503/accuracy/server-rocm.yml @@ -0,0 +1,6 @@ +# common/accuracy/server.yml +trust-remote-code: true +tensor-parallel-size: 1 +max-model-len: 16384 +# override +gpu_memory_utilization: 0.6 diff --git a/mistralai/Mixtral-8x7B-Instruct-v0.1/accuracy/server-rocm.yml b/mistralai/Mixtral-8x7B-Instruct-v0.1/accuracy/server-rocm.yml new file mode 100644 index 0000000..6df55a1 --- /dev/null +++ b/mistralai/Mixtral-8x7B-Instruct-v0.1/accuracy/server-rocm.yml @@ -0,0 +1,7 @@ +# https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1 +model: 'mistralai/Mixtral-8x7B-Instruct-v0.1' +trust-remote-code: true +tensor-parallel-size: 2 +max-model-len: 16384 +# override +enforce-eager: true