From 56f2274080b5562e1cbfd452659c5107ca77d1d0 Mon Sep 17 00:00:00 2001
From: Tamme Claus <tamme.claus@rwth-aachen.de>
Date: Tue, 1 Jul 2025 15:02:25 +0200
Subject: [PATCH 1/6] reflect change in LinearAlgebra.jl

---
 src/host/linalg.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/host/linalg.jl b/src/host/linalg.jl
index bdfab651..4933839c 100644
--- a/src/host/linalg.jl
+++ b/src/host/linalg.jl
@@ -687,8 +687,8 @@ function LinearAlgebra.rotate!(x::AbstractGPUArray, y::AbstractGPUArray, c::Numb
         i = @index(Global, Linear)
         @inbounds xi = x[i]
         @inbounds yi = y[i]
-        @inbounds x[i] =       c  * xi + s * yi
-        @inbounds y[i] = -conj(s) * xi + c * yi
+        @inbounds x[i] = s*yi +      c *xi
+        @inbounds y[i] = c*yi - conj(s)*xi 
     end
     rotate_kernel!(get_backend(x))(x, y, c, s; ndrange = size(x))
     return x, y

From 320c560c642713f13d7932a4c40aca5b9b9f61af Mon Sep 17 00:00:00 2001
From: Tamme Claus <tamme.claus@rwth-aachen.de>
Date: Thu, 3 Jul 2025 09:54:29 +0200
Subject: [PATCH 2/6] typo

---
 test/testsuite.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/testsuite.jl b/test/testsuite.jl
index df68f31a..4d1b3011 100644
--- a/test/testsuite.jl
+++ b/test/testsuite.jl
@@ -46,7 +46,7 @@ function compare(f, AT::Type{<:AbstractGPUArray}, xs...; kwargs...)
 end
 
 function compare(f, AT::Type{<:Array}, xs...; kwargs...)
-    # no need to actually run this tests: we have nothing to compoare against,
+    # no need to actually run this tests: we have nothing to compare against,
     # and we'll run it on a CPU array anyhow when comparing to a GPU array.
     #
     # this method exists so that we can at least run the test suite with Array,

From 60f2d9ee528b996ef1c239f7455f76870375bf8a Mon Sep 17 00:00:00 2001
From: Tamme Claus <tamme.claus@rwth-aachen.de>
Date: Thu, 3 Jul 2025 10:14:49 +0200
Subject: [PATCH 3/6] Test/compare for false = strong zero

---
 test/testsuite/linalg.jl | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/test/testsuite/linalg.jl b/test/testsuite/linalg.jl
index 914b06b2..433f7620 100644
--- a/test/testsuite/linalg.jl
+++ b/test/testsuite/linalg.jl
@@ -282,11 +282,19 @@
     @testset "lmul! and rmul!" for (a,b) in [((3,4),(4,3)), ((3,), (1,3)), ((1,3), (3))], T in eltypes
         @test compare(rmul!, AT, rand(T, a), Ref(rand(T)))
         @test compare(lmul!, AT, Ref(rand(T)), rand(T, b))
+        if isfloattype(T)
+            @test compare(rmul!, AT, fill(T(NaN), a), Ref(false))
+            @test compare(lmul!, AT, Ref(false), fill(T(NaN), b))
+        end
     end
 
     @testset "axp{b}y" for T in eltypes
         @test compare(axpby!, AT, Ref(rand(T)), rand(T,5), Ref(rand(T)), rand(T,5))
         @test compare(axpy!, AT, Ref(rand(T)), rand(T,5), rand(T,5))
+        if isfloattype(T)
+            @test compare(axpby!, AT, Ref(false), fill(T(NaN), 5), Ref(false), fill(T(NaN), 5))
+            @test compare(axpy!, AT, Ref(false), fill(T(NaN), 5), rand(T, 5)) # explicitly test "strong zeroness"
+        end
     end
 
     @testset "dot" for T in eltypes
@@ -295,10 +303,16 @@
 
     @testset "rotate!" for T in eltypes
         @test compare(rotate!, AT, rand(T,5), rand(T,5), Ref(rand(real(T))), Ref(rand(T)))
+        if isfloattype(T) && false # skip because the LinAlg.jl change is not released 
+            @test compare(rotate!, AT, fill(T(NaN), 5), fill(T(NaN), 5), Ref(false), Ref(false))
+        end
     end
 
     @testset "reflect!" for T in eltypes
         @test compare(reflect!, AT, rand(T,5), rand(T,5), Ref(rand(real(T))), Ref(rand(T)))
+        if isfloattype(T)
+            @test compare(reflect!, AT, fill(T(NaN), 5), fill(T(NaN), 5), Ref(false), Ref(false))
+        end
     end
 
     @testset "iszero and isone" for T in eltypes
@@ -330,6 +344,11 @@ end
         @test compare(*, AT, f(A), x)
         @test compare(mul!, AT, y, f(A), x)
         @test compare(mul!, AT, y, f(A), x, Ref(T(4)), Ref(T(5)))
+        #TODO: generic_matvecmul! (from LinearAlgebra.jl) does not respect the "strong zero" for Float16
+        if isfloattype(T) && !(T==Float16) && !(T == ComplexF16)
+            y_NaN, A_NaN, x_NaN = fill(T(NaN), 4), fill(T(NaN), 4, 4), fill(T(NaN), 4)
+            @test compare(mul!, AT, y_NaN, f(A_NaN), x_NaN, Ref(false), Ref(false))
+        end
         @test typeof(AT(rand(T, 3, 3)) * AT(rand(T, 3))) <: AbstractVector
 
         if f !== identity
@@ -348,6 +367,10 @@ end
         @test compare(*, AT, f(A), g(B))
         @test compare(mul!, AT, C, f(A), g(B))
         @test compare(mul!, AT, C, f(A), g(B), Ref(T(4)), Ref(T(5)))
+        if isfloattype(T)
+            A_NaN, B_NaN, C_NaN = fill(T(NaN), 4, 4), fill(T(NaN), 4, 4), fill(T(NaN), 4, 4)
+            @test compare(mul!, AT, C_NaN, f(A_NaN), g(B_NaN), Ref(false), Ref(false))
+        end
         @test typeof(AT(rand(T, 3, 3)) * AT(rand(T, 3, 3))) <: AbstractMatrix
     end
 end

From d49cb2e8ea548e889efe69726d990a46b8053c03 Mon Sep 17 00:00:00 2001
From: Tamme Claus <tamme.claus@rwth-aachen.de>
Date: Sat, 5 Jul 2025 16:38:09 +0200
Subject: [PATCH 4/6] Explicit false/strong zero tests for AbstractGPUArrays

---
 test/testsuite.jl        | 12 ++++++++++++
 test/testsuite/linalg.jl | 28 ++++++++++++++++------------
 2 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/test/testsuite.jl b/test/testsuite.jl
index 4d1b3011..e7c8d9d6 100644
--- a/test/testsuite.jl
+++ b/test/testsuite.jl
@@ -54,6 +54,16 @@ function compare(f, AT::Type{<:Array}, xs...; kwargs...)
     return true
 end
 
+has_NaNs(a::AbstractArray) = isfloattype(eltype(a)) && any(isnan, collect(a))
+has_NaNs(as::NTuple) = any(a -> has_NaNs(a), as)
+
+out_has_NaNs(f, AT::Type{<:Array}, xs...) = false # we do not test stdlibs/LinAlg for NaNs (maybe they should?)
+function out_has_NaNs(f, AT::Type{<:AbstractGPUArray}, xs...)
+    arg_in = map(x -> isa(x, Base.RefValue) ? x[] : adapt(AT, x), xs)
+    arg_out = f(arg_in...)
+    return has_NaNs(arg_out)
+end
+   
 # element types that are supported by the array type
 supported_eltypes(AT, test) = supported_eltypes(AT)
 supported_eltypes(AT) = supported_eltypes()
@@ -67,6 +77,8 @@ isrealtype(T) = T <: Real
 iscomplextype(T) = T <: Complex
 isrealfloattype(T) = T <: AbstractFloat
 isfloattype(T) = T <: AbstractFloat || T <: Complex{<:AbstractFloat}
+NaN_T(T::Type{<:AbstractFloat}) = T(NaN)
+NaN_T(T::Type{<:Complex{<:AbstractFloat}}) = T(NaN, NaN)
 
 # list of tests
 const tests = Dict()
diff --git a/test/testsuite/linalg.jl b/test/testsuite/linalg.jl
index 433f7620..f01e2f44 100644
--- a/test/testsuite/linalg.jl
+++ b/test/testsuite/linalg.jl
@@ -283,8 +283,8 @@
         @test compare(rmul!, AT, rand(T, a), Ref(rand(T)))
         @test compare(lmul!, AT, Ref(rand(T)), rand(T, b))
         if isfloattype(T)
-            @test compare(rmul!, AT, fill(T(NaN), a), Ref(false))
-            @test compare(lmul!, AT, Ref(false), fill(T(NaN), b))
+            @test compare(rmul!, AT, fill(NaN_T(T), a), Ref(false))
+            @test compare(lmul!, AT, Ref(false), fill(NaN_T(T), b))
         end
     end
 
@@ -292,8 +292,8 @@
         @test compare(axpby!, AT, Ref(rand(T)), rand(T,5), Ref(rand(T)), rand(T,5))
         @test compare(axpy!, AT, Ref(rand(T)), rand(T,5), rand(T,5))
         if isfloattype(T)
-            @test compare(axpby!, AT, Ref(false), fill(T(NaN), 5), Ref(false), fill(T(NaN), 5))
-            @test compare(axpy!, AT, Ref(false), fill(T(NaN), 5), rand(T, 5)) # explicitly test "strong zeroness"
+            @test compare(axpby!, AT, Ref(false), fill(NaN_T(T), 5), Ref(false), fill(NaN_T(T), 5))
+            @test compare(axpy!, AT, Ref(false), fill(NaN_T(T), 5), rand(T, 5))
         end
     end
 
@@ -303,15 +303,17 @@
 
     @testset "rotate!" for T in eltypes
         @test compare(rotate!, AT, rand(T,5), rand(T,5), Ref(rand(real(T))), Ref(rand(T)))
-        if isfloattype(T) && false # skip because the LinAlg.jl change is not released 
-            @test compare(rotate!, AT, fill(T(NaN), 5), fill(T(NaN), 5), Ref(false), Ref(false))
+        if isfloattype(T)
+            # skip compare until https://github.com/JuliaLang/LinearAlgebra.jl/pull/1323 is released and only check correct strong zero behaviour of AbstractGPUArray
+            # @test compare(rotate!, AT, fill(NaN_T(T), 5), fill(NaN_T(T), 5), Ref(false), Ref(false))
+            @test !out_has_NaNs(rotate!, AT, fill(NaN_T(T), 5), fill(NaN_T(T), 5), Ref(false), Ref(false))
         end
     end
 
     @testset "reflect!" for T in eltypes
         @test compare(reflect!, AT, rand(T,5), rand(T,5), Ref(rand(real(T))), Ref(rand(T)))
         if isfloattype(T)
-            @test compare(reflect!, AT, fill(T(NaN), 5), fill(T(NaN), 5), Ref(false), Ref(false))
+            @test compare(reflect!, AT, fill(NaN_T(T), 5), fill(NaN_T(T), 5), Ref(false), Ref(false))
         end
     end
 
@@ -344,10 +346,12 @@ end
         @test compare(*, AT, f(A), x)
         @test compare(mul!, AT, y, f(A), x)
         @test compare(mul!, AT, y, f(A), x, Ref(T(4)), Ref(T(5)))
-        #TODO: generic_matvecmul! (from LinearAlgebra.jl) does not respect the "strong zero" for Float16
-        if isfloattype(T) && !(T==Float16) && !(T == ComplexF16)
-            y_NaN, A_NaN, x_NaN = fill(T(NaN), 4), fill(T(NaN), 4, 4), fill(T(NaN), 4)
-            @test compare(mul!, AT, y_NaN, f(A_NaN), x_NaN, Ref(false), Ref(false))
+        if isfloattype(T)
+            y_NaN, A_NaN, x_NaN = fill(NaN_T(T), 4), fill(NaN_T(T), 4, 4), fill(NaN_T(T), 4)
+            if !(T==Float16) && !(T == ComplexF16) # skip Float16/ComplexF16 until https://github.com/JuliaLang/LinearAlgebra.jl/issues/1399 is fixed and only check correct strong zero behaviour of AbstractGPUArray
+                @test compare(mul!, AT, y_NaN, f(A_NaN), x_NaN, Ref(false), Ref(false))
+            end
+            @test !out_has_NaNs(mul!, AT, y_NaN, f(A_NaN), x_NaN, Ref(false), Ref(false))
         end
         @test typeof(AT(rand(T, 3, 3)) * AT(rand(T, 3))) <: AbstractVector
 
@@ -368,7 +372,7 @@ end
         @test compare(mul!, AT, C, f(A), g(B))
         @test compare(mul!, AT, C, f(A), g(B), Ref(T(4)), Ref(T(5)))
         if isfloattype(T)
-            A_NaN, B_NaN, C_NaN = fill(T(NaN), 4, 4), fill(T(NaN), 4, 4), fill(T(NaN), 4, 4)
+            A_NaN, B_NaN, C_NaN = fill(NaN_T(T), 4, 4), fill(NaN_T(T), 4, 4), fill(NaN_T(T), 4, 4)
             @test compare(mul!, AT, C_NaN, f(A_NaN), g(B_NaN), Ref(false), Ref(false))
         end
         @test typeof(AT(rand(T, 3, 3)) * AT(rand(T, 3, 3))) <: AbstractMatrix

From aaac3642b36b34204a1a84b0701571671b79dc33 Mon Sep 17 00:00:00 2001
From: Tamme Claus <tamme.claus@rwth-aachen.de>
Date: Tue, 8 Jul 2025 11:45:20 +0200
Subject: [PATCH 5/6] =?UTF-8?q?only=20test=20matmat/vec=20for=20strong=20z?=
 =?UTF-8?q?ero=20in=20=CE=B2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 test/testsuite/linalg.jl | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/test/testsuite/linalg.jl b/test/testsuite/linalg.jl
index f01e2f44..a593cbd0 100644
--- a/test/testsuite/linalg.jl
+++ b/test/testsuite/linalg.jl
@@ -347,11 +347,8 @@ end
         @test compare(mul!, AT, y, f(A), x)
         @test compare(mul!, AT, y, f(A), x, Ref(T(4)), Ref(T(5)))
         if isfloattype(T)
-            y_NaN, A_NaN, x_NaN = fill(NaN_T(T), 4), fill(NaN_T(T), 4, 4), fill(NaN_T(T), 4)
-            if !(T==Float16) && !(T == ComplexF16) # skip Float16/ComplexF16 until https://github.com/JuliaLang/LinearAlgebra.jl/issues/1399 is fixed and only check correct strong zero behaviour of AbstractGPUArray
-                @test compare(mul!, AT, y_NaN, f(A_NaN), x_NaN, Ref(false), Ref(false))
-            end
-            @test !out_has_NaNs(mul!, AT, y_NaN, f(A_NaN), x_NaN, Ref(false), Ref(false))
+            y_NaN, A_NaN, x_NaN = fill(NaN_T(T), 4), rand(T, 4, 4), rand(T, 4)
+            @test compare(mul!, AT, y_NaN, f(A_NaN), x_NaN, Ref(rand(T)), Ref(false))
         end
         @test typeof(AT(rand(T, 3, 3)) * AT(rand(T, 3))) <: AbstractVector
 
@@ -359,7 +356,7 @@ end
             @test compare(mul!, AT, rand(T, 2,2), rand(T, 2,1), f(rand(T, 2)))
         end
     end
-end
+end β
 
 @testsuite "linalg/mul!/matrix-matrix" (AT, eltypes)->begin
     @testset "$T gemm C := $f(A) * $g(B) * a + C * b" for f in (identity, transpose, adjoint), g in (identity, transpose, adjoint), T in eltypes
@@ -372,8 +369,8 @@ end
         @test compare(mul!, AT, C, f(A), g(B))
         @test compare(mul!, AT, C, f(A), g(B), Ref(T(4)), Ref(T(5)))
         if isfloattype(T)
-            A_NaN, B_NaN, C_NaN = fill(NaN_T(T), 4, 4), fill(NaN_T(T), 4, 4), fill(NaN_T(T), 4, 4)
-            @test compare(mul!, AT, C_NaN, f(A_NaN), g(B_NaN), Ref(false), Ref(false))
+            A_NaN, B_NaN, C_NaN = rand(T, 4, 4), rand(T, 4, 4), fill(NaN_T(T), 4, 4)
+            @test compare(mul!, AT, C_NaN, f(A_NaN), g(B_NaN), Ref(rand(T)), Ref(false))
         end
         @test typeof(AT(rand(T, 3, 3)) * AT(rand(T, 3, 3))) <: AbstractMatrix
     end

From ddd26b758d5b8223a36a5e6e30f914a13e4a1332 Mon Sep 17 00:00:00 2001
From: Tamme Claus <tamme.claus@rwth-aachen.de>
Date: Tue, 8 Jul 2025 11:47:47 +0200
Subject: [PATCH 6/6] typo

---
 test/testsuite/linalg.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/testsuite/linalg.jl b/test/testsuite/linalg.jl
index a593cbd0..b71f43ed 100644
--- a/test/testsuite/linalg.jl
+++ b/test/testsuite/linalg.jl
@@ -356,7 +356,7 @@ end
             @test compare(mul!, AT, rand(T, 2,2), rand(T, 2,1), f(rand(T, 2)))
         end
     end
-end β
+end
 
 @testsuite "linalg/mul!/matrix-matrix" (AT, eltypes)->begin
     @testset "$T gemm C := $f(A) * $g(B) * a + C * b" for f in (identity, transpose, adjoint), g in (identity, transpose, adjoint), T in eltypes