From 3aaf593bcc0b63707a151e964941fff16b408bd6 Mon Sep 17 00:00:00 2001
From: dkupnicki <97047305+dkupnicki@users.noreply.github.com>
Date: Mon, 14 Oct 2024 18:36:30 +0200
Subject: [PATCH] adjust alpaca and llama reference scores in tests

---
 tests/test_pytorch_models.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py
index 00be4edf..1c44a50b 100644
--- a/tests/test_pytorch_models.py
+++ b/tests/test_pytorch_models.py
@@ -51,7 +51,7 @@ def wrapper(**kwargs):
     @unittest.skipIf(psutil.virtual_memory().available / 1024 ** 3 < 100, "too little memory")
     @unittest.skipUnless('_aio_profiler_print' in dir(torch._C), "Ampere optimized PyTorch required")
     def test_llama2_7b(self):
-        f1_ref = 0.349
+        f1_ref = 0.330
         acc = run_process(self.wrapper,
                           {"model_name": "meta-llama/Llama-2-7b-chat-hf", "batch_size": 1, "num_runs": 50,
                            "timeout": None, "dataset_path": self.dataset_path})
@@ -60,7 +60,7 @@ def test_llama2_7b(self):
     @unittest.skipIf(psutil.virtual_memory().available / 1024 ** 3 < 200, "too little memory")
     @unittest.skipUnless('_aio_profiler_print' in dir(torch._C), "Ampere optimized PyTorch required")
     def test_llama2_13b(self):
-        f1_ref = 0.195
+        f1_ref = 0.261
         acc = run_process(self.wrapper,
                           {"model_name": "meta-llama/Llama-2-13b-chat-hf", "batch_size": 1, "num_runs": 50,
                            "timeout": None, "dataset_path": self.dataset_path})
@@ -93,7 +93,7 @@ def test_alpaca(self):
         def wrapper(**kwargs):
             kwargs["q"].put(run_pytorch_fp32(**kwargs)[0])
 
-        exact_match_ref, f1_ref = 0.260, 0.616
+        exact_match_ref, f1_ref = 0.180, 0.548
         acc = run_process(wrapper, {"model_path": self.model_path, "batch_size": 1, "num_runs": 50,
                                     "timeout": None, "dataset_path": self.dataset_path})
         self.assertTrue(acc["exact_match"] / exact_match_ref > 0.95)