JuliaDiff
diff --git a/‎Project.toml
Lines changed: 7 additions & 2 deletions b/‎Project.toml
Lines changed: 7 additions & 2 deletions
diff --git a/‎test/rulesets/Base/array.jl
Lines changed: 27 additions & 26 deletions b/‎test/rulesets/Base/array.jl
Lines changed: 27 additions & 26 deletions
diff --git a/‎test/rulesets/Base/arraymath.jl
Lines changed: 38 additions & 35 deletions b/‎test/rulesets/Base/arraymath.jl
Lines changed: 38 additions & 35 deletions
diff --git a/‎test/rulesets/Base/mapreduce.jl
Lines changed: 9 additions & 9 deletions b/‎test/rulesets/Base/mapreduce.jl
Lines changed: 9 additions & 9 deletions
@@ -1,11 +1,12 @@
 name = "ChainRules"
 uuid = "082447d4-558c-5d27-93f4-14fc19e9eca2"
-version = "1.39.2"
+version = "1.40.0"
 
 [deps]
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527"
 IrrationalConstants = "92d709cd-6900-40b7-9082-c6be49f344b6"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
@@ -18,19 +19,23 @@ ChainRulesCore = "1.15.3"
 ChainRulesTestUtils = "1.5"
 Compat = "3.42.0, 4"
 FiniteDifferences = "0.12.20"
+GPUArraysCore = "0.1.0"
 IrrationalConstants = "0.1.1"
+JLArrays = "0.1"
 JuliaInterpreter = "0.8,0.9"
 RealDot = "0.1"
 StaticArrays = "1.2"
 julia = "1.6"
 
 [extras]
+Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
 FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
+JLArrays = "27aeb0d3-9eb9-45fb-866b-73c2ecf80fcb"
 JuliaInterpreter = "aa1ae85d-cabe-5617-a682-6adf51b2e16a"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["ChainRulesTestUtils", "FiniteDifferences", "JuliaInterpreter", "Random", "StaticArrays", "Test"]
+test = ["Adapt", "ChainRulesTestUtils", "FiniteDifferences", "JLArrays", "JuliaInterpreter", "Random", "StaticArrays", "Test"]
@@ -68,12 +68,12 @@ end
 
 @testset "reshape" begin
     # Forward
-    test_frule(reshape, rand(4, 3), 2, :)
+    @gpu test_frule(reshape, rand(4, 3), 2, :)
     test_frule(reshape, rand(4, 3), axes(rand(6, 2)))
     @test_skip test_frule(reshape, Diagonal(rand(4)), 2, :) # https://github.com/JuliaDiff/ChainRulesTestUtils.jl/issues/239
 
     # Reverse
-    test_rrule(reshape, rand(4, 5), (2, 10))
+    @gpu test_rrule(reshape, rand(4, 5), (2, 10))
     test_rrule(reshape, rand(4, 5), 2, 10)
     test_rrule(reshape, rand(4, 5), 2, :)
     test_rrule(reshape, rand(4, 5), axes(rand(10, 2)))
@@ -98,14 +98,14 @@ end
 
 @testset "permutedims + PermutedDimsArray" begin
     # Forward
-    test_frule(permutedims, rand(5))
-    test_frule(permutedims, rand(3, 4), (2, 1))
+    @gpu test_frule(permutedims, rand(5))
+    @gpu test_frule(permutedims, rand(3, 4), (2, 1))
     test_frule(permutedims!, rand(4,3), rand(3, 4), (2, 1))
     test_frule(PermutedDimsArray, rand(3, 4, 5), (3, 1, 2))
 
     # Reverse
-    test_rrule(permutedims, rand(5))
-    test_rrule(permutedims, rand(3, 4), (2, 1))
+    @gpu test_rrule(permutedims, rand(5))
+    @gpu test_rrule(permutedims, rand(3, 4), (2, 1))
     test_rrule(permutedims, Diagonal(rand(5)), (2, 1))
     # Note BTW that permutedims(Diagonal(rand(5))) does not use the rule at all
 
@@ -127,12 +127,12 @@ end
     test_rrule(repeat, rand(4, ))
     test_rrule(repeat, rand(4, 5))
     test_rrule(repeat, rand(4, 5); fkwargs = (outer=(1,2),))
-    test_rrule(repeat, rand(4, 5); fkwargs = (inner=(1,2), outer=(1,3)))
-    test_rrule(repeat, rand(4, 5); fkwargs = (outer=2,))
+    @gpu_broken test_rrule(repeat, rand(4, 5); fkwargs = (inner=(1,2), outer=(1,3)))
+    @gpu_broken test_rrule(repeat, rand(4, 5); fkwargs = (outer=2,))
 
-    test_rrule(repeat, rand(4, ), 2)
-    test_rrule(repeat, rand(4, 5), 2)
-    test_rrule(repeat, rand(4, 5), 2, 3)
+    @gpu test_rrule(repeat, rand(4, ), 2)
+    @gpu test_rrule(repeat, rand(4, 5), 2)
+    @gpu test_rrule(repeat, rand(4, 5), 2, 3)
     test_rrule(repeat, rand(1,2,3), 2,3,4; check_inferred=VERSION>v"1.6")
     test_rrule(repeat, rand(0,2,3), 2,0,4; check_inferred=VERSION>v"1.6")
     test_rrule(repeat, rand(1,1,1,1), 2,3,4,5; check_inferred=VERSION>v"1.6")
@@ -153,16 +153,16 @@ end
 
     @test rrule(repeat, [1,2,3], 4)[2](ones(12))[2] == [4,4,4]
     @test rrule(repeat, [1,2,3], outer=4)[2](ones(12))[2] == [4,4,4]
-
 end
 
 @testset "hcat" begin
     # forward
-    test_frule(hcat, randn(3, 2), randn(3))
-    test_frule(hcat, randn(), randn(1,3))
+    @gpu test_frule(hcat, randn(3, 2), randn(3))
+    @gpu test_frule(hcat, randn(), randn(1,3))
 
     # reverse
-    test_rrule(hcat, randn(3, 2), randn(3), randn(3, 3))
+    @gpu test_rrule(hcat, randn(3, 2), randn(3), randn(3, 3))
+    @gpu test_rrule(hcat, rand(1,2), rand(), rand(1,3))
     test_rrule(hcat, rand(), rand(1,2), rand(1,2,1))
     test_rrule(hcat, rand(3,1,1,2), rand(3,3,1,2))
 
@@ -194,13 +194,14 @@ end
 end
 
 @testset "vcat" begin
-
     # forward
     test_frule(vcat, randn(), randn(3), rand())
-    test_frule(vcat, randn(3, 1), randn(3))
+    @gpu test_frule(vcat, randn(3), rand(), randn(3))
+    @gpu test_frule(vcat, randn(3, 1), randn(3))
 
     # reverse
-    test_rrule(vcat, randn(2, 4), randn(1, 4), randn(3, 4))
+    @gpu test_rrule(vcat, randn(3), rand(), randn(3))
+    @gpu test_rrule(vcat, randn(2, 4), randn(1, 4), randn(3, 4))
     test_rrule(vcat, rand(), rand())
     test_rrule(vcat, rand(), rand(3), rand(3,1,1))
     test_rrule(vcat, rand(3,1,2), rand(4,1,2))
@@ -230,8 +231,8 @@ end
     test_frule(cat, rand(), rand(2,3); fkwargs=(dims=(1,2),))
 
     # reverse
-    test_rrule(cat, rand(2, 4), rand(1, 4); fkwargs=(dims=1,))
-    test_rrule(cat, rand(2, 4), rand(2); fkwargs=(dims=Val(2),))
+    @gpu test_rrule(cat, rand(2, 4), rand(1, 4); fkwargs=(dims=1,))
+    @gpu test_rrule(cat, rand(2, 4), rand(2); fkwargs=(dims=Val(2),))
     test_rrule(cat, rand(), rand(2, 3); fkwargs=(dims=[1,2],))
     test_rrule(cat, rand(1), rand(3, 2, 1); fkwargs=(dims=(1,2),), check_inferred=false) # infers Tuple{Zero, Vector{Float64}, Any}
 
@@ -263,7 +264,7 @@ end
     end
     @testset "Array" begin
         # Forward
-        test_frule(reverse, rand(5))
+        @gpu_broken test_frule(reverse, rand(5))
         test_frule(reverse, rand(5), 2, 4)
         test_frule(reverse, rand(5), fkwargs=(dims=1,))
         test_frule(reverse, rand(3,4), fkwargs=(dims=2,))
@@ -275,7 +276,7 @@ end
         test_frule(reverse!, rand(3,4), fkwargs=(dims=2,))
 
         # Reverse
-        test_rrule(reverse, rand(5))
+        @gpu_broken test_rrule(reverse, rand(5))
         test_rrule(reverse, rand(5), 2, 4)
         test_rrule(reverse, rand(5), fkwargs=(dims=1,))
 
@@ -293,15 +294,15 @@ end
 
 @testset "circshift" begin
     # Forward
-    test_frule(circshift, rand(10), 1)
+    @gpu test_frule(circshift, rand(10), 1)
     test_frule(circshift, rand(10), (1,))
     test_frule(circshift, rand(3,4), (-7,2))
 
     test_frule(circshift!, rand(10), rand(10), 1)
     test_frule(circshift!, rand(3,4), rand(3,4), (-7,2))
 
     # Reverse
-    test_rrule(circshift, rand(10), 1)
+    @gpu test_rrule(circshift, rand(10), 1)
     test_rrule(circshift, rand(10) .+ im, -2)
     test_rrule(circshift, rand(10), (1,))
     test_rrule(circshift, rand(3,4), (-7,2))
@@ -379,14 +380,14 @@ end
     # Forward
     test_frule(imum, rand(10))
     test_frule(imum, rand(3,4))
-    test_frule(imum, rand(3,4), fkwargs=(dims=1,))
+    @gpu_broken test_frule(imum, rand(3,4), fkwargs=(dims=1,))
     test_frule(imum, [rand(2) for _ in 1:3])
     test_frule(imum, [rand(2) for _ in 1:3, _ in 1:4]; fkwargs=(dims=1,))
 
     # Reverse
     test_rrule(imum, rand(10))
     test_rrule(imum, rand(3,4))
-    test_rrule(imum, rand(3,4), fkwargs=(dims=1,))
+    @gpu_broken test_rrule(imum, rand(3,4), fkwargs=(dims=1,))
     test_rrule(imum, rand(3,4,5), fkwargs=(dims=(1,3),))
 
     # Arrays of arrays
 
@@ -1,21 +1,26 @@
 @testset "arraymath.jl" begin
     @testset "inv(::Matrix{$T})" for T in (Float64, ComplexF64)
         B = generate_well_conditioned_matrix(T, 3)
-        test_frule(inv, B)
-        test_rrule(inv, B)
+        if VERSION >= v"1.7"
+          @gpu test_frule(inv, B)
+          @gpu test_rrule(inv, B)
+        else
+          @gpu_broken test_frule(inv, B)
+          @gpu_broken test_rrule(inv, B)
+        end
     end
 
     @testset "*: $T" for T in (Float64, ComplexF64)
         ⋆(a) = round.(5*randn(T, a))  # Helper to generate nice random values
         ⋆(a, b) = ⋆((a, b))  # matrix
         ⋆() = only(⋆(()))  # scalar
 
-        @testset "Scalar-Array $dims" for dims in ((3,), (5,4), (2, 3, 4, 5))
-            test_frule(*, ⋆(), ⋆(dims))
-            test_frule(*, ⋆(dims), ⋆())
+        @testset "Scalar-Array $dims" for dims in ((3,), (2, 3, 4))
+            @gpu test_frule(*, ⋆(), ⋆(dims))
+            @gpu test_frule(*, ⋆(dims), ⋆())
 
-            test_rrule(*, ⋆(), ⋆(dims))
-            test_rrule(*, ⋆(dims), ⋆())
+            @gpu test_rrule(*, ⋆(), ⋆(dims))
+            @gpu test_rrule(*, ⋆(dims), ⋆())
         end
 
         @testset "AbstractMatrix-AbstractVector n=$n, m=$m" for n in (2, 3), m in (4, 5)
@@ -60,41 +65,39 @@
 
         @testset "Diagonal" begin
             # fwd
-            test_frule(*, Diagonal([1.0, 2.0, 3.0]), Diagonal([4.0, 5.0, 6.0]))
-            test_frule(*, Diagonal([1.0, 2.0, 3.0]), rand(3))
+            @gpu test_frule(*, Diagonal([1.0, 2.0, 3.0]), Diagonal([4.0, 5.0, 6.0]))
+            @gpu test_frule(*, Diagonal([1.0, 2.0, 3.0]), rand(3))
 
             # rev
-            test_rrule(*, Diagonal([1.0, 2.0, 3.0]), Diagonal([4.0, 5.0, 6.0]))
-            test_rrule(*, Diagonal([1.0, 2.0, 3.0]), rand(3))
+            @gpu test_rrule(*, Diagonal([1.0, 2.0, 3.0]), Diagonal([4.0, 5.0, 6.0]))
+            @gpu test_rrule(*, Diagonal([1.0, 2.0, 3.0]), rand(3))
 
             # Needs to not try and inplace, as `mul!` will do wrong.
             # see https://github.com/JuliaDiff/ChainRulesCore.jl/issues/411
-            test_rrule(*, Diagonal([1.0, 2.0, 3.0]), rand(3,3))
+            @gpu test_rrule(*, Diagonal([1.0, 2.0, 3.0]), rand(3,3))
         end
 
-        @testset "Covector * Vector n=$n" for n in (3, 5)
-            @testset "$f" for f in (adjoint, transpose)
-                # This should be same as dot product and give a scalar
-                test_rrule(*, f(⋆(n)) ⊢ f(⋆(n)), ⋆(n))
-            end
+        @testset "$adj * Vector" for adj in (adjoint, transpose)
+            # This should be same as dot product and give a scalar
+            test_rrule(*, adj(⋆(5)) ⊢ adj(⋆(5)), ⋆(5))
         end
     end
 
     @testset "muladd: $T" for T in (Float64, ComplexF64)
-        @testset "add $(typeof(z))" for z in [rand(T), rand(T, 3), rand(T, 3, 3), false]
+        @testset "add $(typeof(z))" for z in [rand(), rand(T, 3), rand(T, 3, 3), false]
             @testset "forward mode" begin
-                test_frule(muladd, rand(T, 3, 5), rand(T, 5, 3), z)
+                @gpu test_frule(muladd, rand(T, 3, 5), rand(T, 5, 3), z)
             end
             @testset "matrix * matrix" begin
                 A = rand(T, 3, 3)
                 B = rand(T, 3, 3)
-                test_rrule(muladd, A, B, z)
-                test_rrule(muladd, A', B, z)
-                test_rrule(muladd, A , B', z)
+                @gpu test_rrule(muladd, A, B, z)
+                @gpu test_rrule(muladd, A', B, z)
+                @gpu test_rrule(muladd, A , B', z)
 
                 C = rand(T, 3, 5)
                 D = rand(T, 5, 3)
-                test_rrule(muladd, C, D, z)
+                @gpu test_rrule(muladd, C, D, z)
             end
             if ndims(z) <= 1
                 @testset "matrix * vector" begin
@@ -181,32 +184,32 @@
     @testset "/ and \\ Scalar-AbstractArray" begin
         A = round.(10 .* randn(3, 4, 5), digits=1)
         # fwd
-        test_frule(/, A, 7.2)
-        test_frule(\, 7.2, A)
+        @gpu test_frule(/, A, 7.2)
+        @gpu test_frule(\, 7.2, A)
         # rev
-        test_rrule(/, A, 7.2)
-        test_rrule(\, 7.2, A)
+        @gpu test_rrule(/, A, 7.2)
+        @gpu test_rrule(\, 7.2, A)
 
         C = round.(10 .* randn(6) .+ im .* 10 .* randn(6), digits=1)
-        test_rrule(/, C, 7.2+8.3im)
-        test_rrule(\, 7.2+8.3im, C)
+        @gpu test_rrule(/, C, 7.2+8.3im)
+        @gpu test_rrule(\, 7.2+8.3im, C)
     end
 
     @testset "negation" begin
         A = randn(4, 4)
         Ā = randn(4, 4)
         # fwd
-        test_frule(-, A)
+        @gpu test_frule(-, A)
         # rev
-        test_rrule(-, A)
-        test_rrule(-, Diagonal(A); output_tangent=Diagonal(Ā))
+        @gpu test_rrule(-, A)
+        @gpu test_rrule(-, Diagonal(A); output_tangent=Diagonal(Ā))
     end
 
     @testset "addition" begin
         # fwd
-        test_frule(+, randn(2), randn(2), randn(2))
+        @gpu test_frule(+, randn(2), randn(2), randn(2))
         # rev
-        test_rrule(+, randn(4, 4), randn(4, 4), randn(4, 4))
-        test_rrule(+, randn(3), randn(3,1), randn(3,1,1))
+        @gpu test_rrule(+, randn(4, 4), randn(4, 4), randn(4, 4))
+        @gpu test_rrule(+, randn(3), randn(3,1), randn(3,1,1))
     end
 end
@@ -15,12 +15,12 @@ const CFG = ChainRulesTestUtils.ADviaRuleConfig()
     end
     @testset "sum(x; dims=$dims)" for dims in (:, 2, (1,3))
         # Forward
-        test_frule(sum, rand(5); fkwargs=(;dims=dims))
-        test_frule(sum, rand(ComplexF64, 2,3,4); fkwargs=(;dims=dims))
+        @gpu test_frule(sum, rand(5); fkwargs=(;dims=dims))
+        @gpu test_frule(sum, rand(ComplexF64, 2,3,4); fkwargs=(;dims=dims))
 
         # Reverse
-        test_rrule(sum, rand(5); fkwargs=(;dims=dims))
-        test_rrule(sum, rand(ComplexF64, 2,3,4); fkwargs=(;dims=dims))
+        @gpu test_rrule(sum, rand(5); fkwargs=(;dims=dims))
+        @gpu test_rrule(sum, rand(ComplexF64, 2,3,4); fkwargs=(;dims=dims))
 
         # Structured matrices
         test_rrule(sum, rand(5)'; fkwargs=(;dims=dims))
@@ -58,8 +58,8 @@ const CFG = ChainRulesTestUtils.ADviaRuleConfig()
         @testset "dims = $dims" for dims in (:, 1)
             @testset "Array{$N, $T}" for N in eachindex(sizes), T in (Float64, ComplexF64)
                 x = randn(T, sizes[1:N]...)
-                test_frule(sum, abs2, x; fkwargs=(;dims=dims))
-                test_rrule(sum, abs2, x; fkwargs=(;dims=dims))
+                @gpu test_frule(sum, abs2, x; fkwargs=(;dims=dims))
+                @gpu test_rrule(sum, abs2, x; fkwargs=(;dims=dims))
             end
 
             # Boolean -- via @non_differentiable, test that this isn't ambiguous
@@ -156,10 +156,10 @@ const CFG = ChainRulesTestUtils.ADviaRuleConfig()
                 ((3,4), 1), ((3,4), 2), ((3,4), :), ((3,4), [1,2]),
                 ((3,4,1), 1), ((3,2,2), 3), ((3,2,2), 2:3),
                 ]
-                x = randn(T, sz)
-                test_rrule(prod, x; fkwargs=(dims=dims,), check_inferred=true)
+                x = rand(T, sz) .+ 1  # no zeros
+                @gpu test_rrule(prod, x; fkwargs=(dims=dims,), check_inferred=true)
                 x[1] = 0
-                test_rrule(prod, x; fkwargs=(dims=dims,), check_inferred=true)
+                @gpu_broken test_rrule(prod, x; fkwargs=(dims=dims,), check_inferred=true)
                 x[5] = 0
                 test_rrule(prod, x; fkwargs=(dims=dims,), check_inferred=true)
                 x[3] = x[7] = 0  # two zeros along some slice, for any dims