diff --git a/RedHatAI/phi-4-FP8-dynamic/accuracy/tasks.yml b/RedHatAI/phi-4-FP8-dynamic/accuracy/tasks.yml index 1ae693b..fce8c96 100644 --- a/RedHatAI/phi-4-FP8-dynamic/accuracy/tasks.yml +++ b/RedHatAI/phi-4-FP8-dynamic/accuracy/tasks.yml @@ -1,5 +1,6 @@ tasks: - name: arc_challenge + rtol: 0.16 metrics: - name: acc_norm,none value: 0.6425 @@ -15,6 +16,7 @@ tasks: value: 0.8419 - name: mmlu + rtol: 0.05 metrics: - name: acc,none value: 0.803 @@ -25,6 +27,7 @@ tasks: value: 0.5954 - name: winogrande + rtol: 0.07 metrics: - name: acc,none value: 0.7987 diff --git a/RedHatAI/phi-4-quantized.w4a16/accuracy/tasks.yml b/RedHatAI/phi-4-quantized.w4a16/accuracy/tasks.yml index 0f215e6..3b8c57b 100644 --- a/RedHatAI/phi-4-quantized.w4a16/accuracy/tasks.yml +++ b/RedHatAI/phi-4-quantized.w4a16/accuracy/tasks.yml @@ -1,5 +1,6 @@ tasks: - name: arc_challenge + rtol: 0.14 metrics: - name: acc_norm,none value: 0.6288 @@ -15,6 +16,7 @@ tasks: value: 0.8342 - name: mmlu + rtol: 0.05 metrics: - name: acc,none value: 0.7987 @@ -25,6 +27,7 @@ tasks: value: 0.5918 - name: winogrande + rtol: 0.07 metrics: - name: acc,none value: 0.8074 diff --git a/RedHatAI/phi-4-quantized.w8a8/accuracy/tasks.yml b/RedHatAI/phi-4-quantized.w8a8/accuracy/tasks.yml index b19e1a6..7d612b7 100644 --- a/RedHatAI/phi-4-quantized.w8a8/accuracy/tasks.yml +++ b/RedHatAI/phi-4-quantized.w8a8/accuracy/tasks.yml @@ -1,5 +1,6 @@ tasks: - name: arc_challenge + rtol: 0.16 metrics: - name: acc_norm,none value: 0.6433 @@ -15,6 +16,7 @@ tasks: value: 0.843 - name: mmlu + rtol: 0.05 metrics: - name: acc,none value: 0.8039 @@ -25,6 +27,7 @@ tasks: value: 0.5882 - name: winogrande + rtol: 0.07 metrics: - name: acc,none value: 0.7995 diff --git a/microsoft/phi-4/accuracy/tasks.yml b/microsoft/phi-4/accuracy/tasks.yml index a83d6f0..c34e3e4 100644 --- a/microsoft/phi-4/accuracy/tasks.yml +++ b/microsoft/phi-4/accuracy/tasks.yml @@ -1,5 +1,6 @@ tasks: - name: arc_challenge + rtol: 0.16 metrics: - name: acc_norm,none value: 0.6442 @@ -15,6 +16,7 @@ tasks: value: 0.8437 - name: mmlu + rtol: 0.05 metrics: - name: acc,none value: 0.803 @@ -25,6 +27,7 @@ tasks: value: 0.5937 - name: winogrande + rtol: 0.07 metrics: - name: acc,none value: 0.8058