diff --git a/Qwen/Qwen2.5-7B-Instruct/accuracy/tasks.yml b/Qwen/Qwen2.5-7B-Instruct/accuracy/tasks.yml index 1b8d8e8..833914d 100644 --- a/Qwen/Qwen2.5-7B-Instruct/accuracy/tasks.yml +++ b/Qwen/Qwen2.5-7B-Instruct/accuracy/tasks.yml @@ -1,31 +1,35 @@ tasks: - name: arc_challenge + rtol: 0.09 metrics: - name: acc_norm,none - value: 0.5939 + value: 0.634 - name: gsm8k + rtol: 0.05 metrics: - name: exact_match,strict-match - value: 0.7976 + value: 0.8036 - name: hellaswag metrics: - name: acc_norm,none - value: 0.8017 + value: 0.8152 - name: mmlu metrics: - name: acc,none - value: 0.7415 + value: 0.7424 - name: truthfulqa_mc2 + rtol: 0.15 metrics: - name: acc,none - value: 0.5637 + value: 0.6476 - name: winogrande + rtol: 0.09 metrics: - name: acc,none - value: 0.7569 + value: 0.7466 diff --git a/RedHatAI/Qwen2.5-7B-Instruct-FP8-dynamic/accuracy/tasks.yml b/RedHatAI/Qwen2.5-7B-Instruct-FP8-dynamic/accuracy/tasks.yml index a896989..c5f2873 100644 --- a/RedHatAI/Qwen2.5-7B-Instruct-FP8-dynamic/accuracy/tasks.yml +++ b/RedHatAI/Qwen2.5-7B-Instruct-FP8-dynamic/accuracy/tasks.yml @@ -1,10 +1,12 @@ tasks: - name: arc_challenge + rtol: 0.16 metrics: - name: acc_norm,none value: 0.6314 - name: gsm8k + rtol: 0.06 metrics: - name: exact_match,strict-match value: 0.8006 @@ -25,6 +27,7 @@ tasks: value: 0.6487 - name: winogrande + rtol: 0.07 metrics: - name: acc,none value: 0.7443 diff --git a/RedHatAI/Qwen2.5-7B-Instruct-quantized.w4a16/accuracy/tasks.yml b/RedHatAI/Qwen2.5-7B-Instruct-quantized.w4a16/accuracy/tasks.yml index 3b21a6c..0805ecf 100644 --- a/RedHatAI/Qwen2.5-7B-Instruct-quantized.w4a16/accuracy/tasks.yml +++ b/RedHatAI/Qwen2.5-7B-Instruct-quantized.w4a16/accuracy/tasks.yml @@ -1,10 +1,12 @@ tasks: - name: arc_challenge + rtol: 0.15 metrics: - name: acc_norm,none value: 0.6323 - name: gsm8k + rtol: 0.09 metrics: - name: exact_match,strict-match value: 0.8059 @@ -25,6 +27,7 @@ tasks: value: 0.6427 - name: winogrande + rtol: 0.07 metrics: - name: acc,none value: 0.7419 diff --git a/RedHatAI/Qwen2.5-7B-Instruct-quantized.w8a8/accuracy/tasks.yml b/RedHatAI/Qwen2.5-7B-Instruct-quantized.w8a8/accuracy/tasks.yml index 6047049..19cbea7 100644 --- a/RedHatAI/Qwen2.5-7B-Instruct-quantized.w8a8/accuracy/tasks.yml +++ b/RedHatAI/Qwen2.5-7B-Instruct-quantized.w8a8/accuracy/tasks.yml @@ -1,10 +1,12 @@ tasks: - name: arc_challenge + rtol: 0.21 metrics: - name: acc_norm,none value: 0.6323 - name: gsm8k + rtol: 0.09 metrics: - name: exact_match,strict-match value: 0.8074 @@ -25,6 +27,7 @@ tasks: value: 0.6458 - name: winogrande + rtol: 0.05 metrics: - name: acc,none value: 0.7482 diff --git a/RedHatAI/Qwen2.5-7B-quantized.w4a16/accuracy/tasks.yml b/RedHatAI/Qwen2.5-7B-quantized.w4a16/accuracy/tasks.yml index da02244..cb78516 100644 --- a/RedHatAI/Qwen2.5-7B-quantized.w4a16/accuracy/tasks.yml +++ b/RedHatAI/Qwen2.5-7B-quantized.w4a16/accuracy/tasks.yml @@ -1,5 +1,6 @@ tasks: - name: arc_challenge + rtol: 0.17 metrics: - name: acc_norm,none value: 0.587 @@ -25,6 +26,7 @@ tasks: value: 0.5548 - name: winogrande + rtol: 0.09 metrics: - name: acc,none value: 0.7601