@@ -486,7 +486,7 @@ def test_small_model_logits(self):
486
486
# Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
487
487
# considering differences in hardware processing and potential deviations in output.
488
488
EXPECTED_LOGITS = {
489
- 7 : torch .Tensor ([[0.1670 , 0.1620 , 0.6094 ], [- 0.8906 , - 0.1588 , - 0.6060 ], [0.1572 , 0.1290 , 0.7246 ]]).to (
489
+ 7 : torch .Tensor ([[0.1640 , 0.1621 , 0.6093 ], [- 0.8906 , - 0.1640 , - 0.6093 ], [0.1562 , 0.1250 , 0.7226 ]]).to (
490
490
torch_device
491
491
),
492
492
8 : torch .Tensor ([[0.1631 , 0.1621 , 0.6094 ], [- 0.8906 , - 0.1621 , - 0.6094 ], [0.1572 , 0.1270 , 0.7227 ]]).to (
@@ -499,6 +499,8 @@ def test_small_model_logits(self):
499
499
with torch .no_grad ():
500
500
logits = model (dummy_input ).logits
501
501
502
+ logits = logits .float ()
503
+
502
504
torch .testing .assert_close (
503
505
logits [0 , :3 , :3 ], EXPECTED_LOGITS [self .cuda_compute_capability_major_version ], atol = 1e-3 , rtol = 1e-3
504
506
)
@@ -525,7 +527,7 @@ def test_small_model_logits_batched(self):
525
527
# considering differences in hardware processing and potential deviations in generated text.
526
528
EXPECTED_LOGITS_LEFT = {
527
529
7 : torch .Tensor (
528
- [[0.1750 , 0.0537 , 0.7007 ], [0.1750 , 0.0537 , 0.7007 ], [0.1750 , 0.0537 , 0.7007 ]],
530
+ [[0.1904 , 0.0500 , 0.7187 ], [0.1933 , 0.0515 , 0.7187 ], [0.2001 , 0.0559 , 0.7148 ]],
529
531
).to (torch_device ),
530
532
8 : torch .Tensor ([[0.1914 , 0.0508 , 0.7188 ], [0.1953 , 0.0510 , 0.7227 ], [0.1973 , 0.0562 , 0.7148 ]]).to (
531
533
torch_device
@@ -537,7 +539,7 @@ def test_small_model_logits_batched(self):
537
539
538
540
EXPECTED_LOGITS_LEFT_UNPADDED = {
539
541
7 : torch .Tensor (
540
- [[0.2212 , 0.5200 , - 0.3816 ], [0.8213 , - 0.2313 , 0.6069 ], [0.2664 , - 0.7090 , 0.2468 ]],
542
+ [[0.2236 , 0.5195 , - 0.3828 ], [0.8203 , - 0.2275 , 0.6054 ], [0.2656 , - 0.7070 , 0.2460 ]],
541
543
).to (torch_device ),
542
544
8 : torch .Tensor ([[0.2217 , 0.5195 , - 0.3828 ], [0.8203 , - 0.2295 , 0.6055 ], [0.2676 , - 0.7109 , 0.2461 ]]).to (
543
545
torch_device
@@ -548,7 +550,7 @@ def test_small_model_logits_batched(self):
548
550
}
549
551
550
552
EXPECTED_LOGITS_RIGHT_UNPADDED = {
551
- 7 : torch .Tensor ([[0.2205 , 0.1232 , - 0.1611 ], [- 0.3484 , 0.3030 , - 1.0312 ], [0.0742 , 0.7930 , 0.7969 ]]).to (
553
+ 7 : torch .Tensor ([[0.2167 , 0.1269 , - 0.1640 ], [- 0.3496 , 0.2988 , - 1.0312 ], [0.0688 , 0.7929 , 0.8007 ]]).to (
552
554
torch_device
553
555
),
554
556
8 : torch .Tensor ([[0.2178 , 0.1260 , - 0.1621 ], [- 0.3496 , 0.2988 , - 1.0312 ], [0.0693 , 0.7930 , 0.8008 ]]).to (
@@ -561,6 +563,7 @@ def test_small_model_logits_batched(self):
561
563
562
564
with torch .no_grad ():
563
565
logits = model (dummy_input , attention_mask = attention_mask ).logits
566
+ logits = logits .float ()
564
567
565
568
torch .testing .assert_close (
566
569
logits [0 , :3 , :3 ], EXPECTED_LOGITS_LEFT [self .cuda_compute_capability_major_version ], atol = 1e-3 , rtol = 1e-3
0 commit comments