diff --git a/RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic/accuracy/server.yml b/RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic/accuracy/server.yml new file mode 100644 index 0000000..b3b13eb --- /dev/null +++ b/RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic/accuracy/server.yml @@ -0,0 +1,4 @@ +trust-remote-code: true +tensor-parallel-size: 8 +max-model-len: 16384 +gpu_memory_utilization: 0.7 diff --git a/RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic/accuracy/tasks.yml b/RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic/accuracy/tasks.yml new file mode 100644 index 0000000..0110c12 --- /dev/null +++ b/RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic/accuracy/tasks.yml @@ -0,0 +1,64 @@ +tasks: + - name: arc_challenge + metrics: + - name: acc_norm,none + value: 0.6962 + + - name: gsm8k + metrics: + - name: strict_match,none + value: 0.8976 + + - name: hellaswag + metrics: + - name: acc_norm,none + value: 0.8518 + + - name: mmlu + metrics: + - name: acc,none + value: 0.8049 + + - name: truthfulqa_mc2 + metrics: + - name: acc,none + value: 0.609 + + - name: winogrande + metrics: + - name: acc,none + value: 0.7703 + + # following are placeholders for mid-level "leaderboard_*" tasks + # (OpenLLM v2) waiting for info on how to calculate the metric + # values from the individual sub tasks. + + # - name: leaderboard_ifeval + # metrics: + # - name: inst_level_strict_acc,none + # value: 0.8769 + + # - name: leaderboard_bbh + # metrics: + # - name: acc-norm,none + # value: 0.6501 + + # - name: leaderboard_math_hard + # metrics: + # - name: exact_match,none + # value: 0.571 + + # - name: leaderboard_gpqa + # metrics: + # - name: acc-norm,none + # value: 0.3205 + + # - name: leaderboard_musr + # metrics: + # - name: acc-norm,none + # value: 0.4312 + + # - name: leaderboard_mmlu_pro + # metrics: + # - name: acc,none + # value: 0.556 diff --git a/RedHatAI/Llama-4-Scout-17B-16E-Instruct-quantized.w4a16/accuracy/server.yml b/RedHatAI/Llama-4-Scout-17B-16E-Instruct-quantized.w4a16/accuracy/server.yml new file mode 100644 index 0000000..b3b13eb --- /dev/null +++ b/RedHatAI/Llama-4-Scout-17B-16E-Instruct-quantized.w4a16/accuracy/server.yml @@ -0,0 +1,4 @@ +trust-remote-code: true +tensor-parallel-size: 8 +max-model-len: 16384 +gpu_memory_utilization: 0.7 diff --git a/RedHatAI/Llama-4-Scout-17B-16E-Instruct-quantized.w4a16/accuracy/tasks.yml b/RedHatAI/Llama-4-Scout-17B-16E-Instruct-quantized.w4a16/accuracy/tasks.yml new file mode 100644 index 0000000..cca6c33 --- /dev/null +++ b/RedHatAI/Llama-4-Scout-17B-16E-Instruct-quantized.w4a16/accuracy/tasks.yml @@ -0,0 +1,65 @@ +tasks: + - name: arc_challenge + metrics: + - name: acc_norm,none + value: 0.6834 + + - name: gsm8k + metrics: + - name: strict_match,none + value: 0.909 + + - name: hellaswag + metrics: + - name: acc_norm,none + value: 0.8495 + + - name: mmlu + metrics: + - name: acc,none + value: 0.8034 + + - name: truthfulqa_mc2 + metrics: + - name: acc,none + value: 0.613 + + - name: winogrande + metrics: + - name: acc,none + value: 0.7711 + + # following are placeholders for mid-level "leaderboard_*" tasks + # (OpenLLM v2) waiting for info on how to calculate the metric + # values from the individual sub tasks. + + # - name: leaderboard_ifeval + # metrics: + # - name: inst_level_strict_acc,none + # value: 0.8647 + + # - name: leaderboard_bbh + # metrics: + # - name: acc-norm,none + # value: 0.6478 + + # - name: leaderboard_math_hard + # metrics: + # - name: exact_match,none + # value: 0.5733 + + # - name: leaderboard_gpqa + # metrics: + # - name: acc-norm,none + # value: 0.3188 + + # - name: leaderboard_musr + # metrics: + # - name: acc-norm,none + # value: 0.4259 + + # - name: leaderboard_mmlu_pro + # metrics: + # - name: acc,none + # value: 0.5496 + \ No newline at end of file diff --git a/meta-llama/Llama-4-Scout-17B-16E-Instruct/accuracy/server.yml b/meta-llama/Llama-4-Scout-17B-16E-Instruct/accuracy/server.yml new file mode 100644 index 0000000..b3b13eb --- /dev/null +++ b/meta-llama/Llama-4-Scout-17B-16E-Instruct/accuracy/server.yml @@ -0,0 +1,4 @@ +trust-remote-code: true +tensor-parallel-size: 8 +max-model-len: 16384 +gpu_memory_utilization: 0.7 diff --git a/meta-llama/Llama-4-Scout-17B-16E-Instruct/accuracy/tasks.yml b/meta-llama/Llama-4-Scout-17B-16E-Instruct/accuracy/tasks.yml index 582addf..65ee00a 100644 --- a/meta-llama/Llama-4-Scout-17B-16E-Instruct/accuracy/tasks.yml +++ b/meta-llama/Llama-4-Scout-17B-16E-Instruct/accuracy/tasks.yml @@ -1,28 +1,64 @@ -# accuracy configs for https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct tasks: -- name: "leaderboard_bbh" - metrics: - - name: "acc_norm,none" - value: 0.50946 -- name: "leaderboard_gpqa" - metrics: - - name: "acc_norm,none" - value: 0.29698 -- name: "leaderboard_ifeval" - metrics: - - name: "inst_level_loose_acc,none" - value: 0.85851 - - name: "inst_level_strict_acc,none" - value: 0.82374 - - name: "prompt_level_loose_acc,none" - value: 0.79667 - - name: "prompt_level_strict_acc,none" - value: 0.74861 -- name: "leaderboard_math_hard" - metrics: - - name: "exact_match,none" - value: 0.19864 -- name: "leaderboard_musr" - metrics: - - name: "acc_norm,none" - value: 0.38359 + - name: arc_challenge + metrics: + - name: acc_norm,none + value: 0.6937 + + - name: gsm8k + metrics: + - name: strict_match,none + value: 0.9045 + + - name: hellaswag + metrics: + - name: acc_norm,none + value: 0.8523 + + - name: mmlu + metrics: + - name: acc,none + value: 0.8054 + + - name: truthfulqa_mc2 + metrics: + - name: acc,none + value: 0.6141 + + - name: winogrande + metrics: + - name: acc,none + value: 0.779 + + # following are placeholders for mid-level "leaderboard_*" tasks + # (OpenLLM v2) waiting for info on how to calculate the metric + # values from the individual sub tasks. + + # - name: leaderboard_ifeval + # metrics: + # - name: inst_level_strict_acc,none + # value: 0.869 + + # - name: leaderboard_bbh + # metrics: + # - name: acc-norm,none + # value: 0.6513 + + # - name: leaderboard_math_hard + # metrics: + # - name: exact_match,none + # value: 0.5778 + + # - name: leaderboard_gpqa + # metrics: + # - name: acc-norm,none + # value: 0.3188 + + # - name: leaderboard_musr + # metrics: + # - name: acc-norm,none + # value: 0.422 + + # - name: leaderboard_mmlu_pro + # metrics: + # - name: acc,none + # value: 0.557