From 3aaf593bcc0b63707a151e964941fff16b408bd6 Mon Sep 17 00:00:00 2001 From: dkupnicki <97047305+dkupnicki@users.noreply.github.com> Date: Mon, 14 Oct 2024 18:36:30 +0200 Subject: [PATCH] adjust alpaca and llama reference scores in tests --- tests/test_pytorch_models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_pytorch_models.py b/tests/test_pytorch_models.py index 00be4edf..1c44a50b 100644 --- a/tests/test_pytorch_models.py +++ b/tests/test_pytorch_models.py @@ -51,7 +51,7 @@ def wrapper(**kwargs): @unittest.skipIf(psutil.virtual_memory().available / 1024 ** 3 < 100, "too little memory") @unittest.skipUnless('_aio_profiler_print' in dir(torch._C), "Ampere optimized PyTorch required") def test_llama2_7b(self): - f1_ref = 0.349 + f1_ref = 0.330 acc = run_process(self.wrapper, {"model_name": "meta-llama/Llama-2-7b-chat-hf", "batch_size": 1, "num_runs": 50, "timeout": None, "dataset_path": self.dataset_path}) @@ -60,7 +60,7 @@ def test_llama2_7b(self): @unittest.skipIf(psutil.virtual_memory().available / 1024 ** 3 < 200, "too little memory") @unittest.skipUnless('_aio_profiler_print' in dir(torch._C), "Ampere optimized PyTorch required") def test_llama2_13b(self): - f1_ref = 0.195 + f1_ref = 0.261 acc = run_process(self.wrapper, {"model_name": "meta-llama/Llama-2-13b-chat-hf", "batch_size": 1, "num_runs": 50, "timeout": None, "dataset_path": self.dataset_path}) @@ -93,7 +93,7 @@ def test_alpaca(self): def wrapper(**kwargs): kwargs["q"].put(run_pytorch_fp32(**kwargs)[0]) - exact_match_ref, f1_ref = 0.260, 0.616 + exact_match_ref, f1_ref = 0.180, 0.548 acc = run_process(wrapper, {"model_path": self.model_path, "batch_size": 1, "num_runs": 50, "timeout": None, "dataset_path": self.dataset_path}) self.assertTrue(acc["exact_match"] / exact_match_ref > 0.95)