fixup tuplecast

mcabbott · mcabbott · commit ec4172423e99 · 2022-07-12T08:56:21.000-04:00
diff --git a/src/tuplecast.jl b/src/tuplecast.jl
@@ -26,28 +26,36 @@ function tuplecast(f::F, args...) where {F}
         T <: Tuple || throw(ArgumentError("""tuplecast(f, args) only works on functions returning a tuple,
             but f = $(sprint(show, f)) returns type T = $T"""))
     end
+    # TODO allow GPU arrays, possibly just as a fallback unzip, but see also: 
+    #   https://github.com/JuliaArrays/StructArrays.jl/issues/150
     # if any(a -> a isa CuArray, args)
     #     return unzip(broadcast(f, args...))
     # end
     bc = Broadcast.instantiate(Broadcast.broadcasted(f, args...))
-    StructArrays.components(StructArray(bc))
+    if Broadcast.BroadcastStyle(typeof(bc)) isa Broadcast.AbstractArrayStyle
+        return StructArrays.components(StructArray(bc))
+    else
+        return unzip(broadcast(f, args...))  # e.g. tuples
+    end
 end
 
 function ChainRulesCore.rrule(cfg::RuleConfig{>:HasReverseMode}, ::typeof(tuplecast), f::F, args...) where {F}
-    y, back = rrule_via_ad(cfg, broadcasted, f, args...)
+    y, back = rrule_via_ad(cfg, broadcast, f, args...)
     z = unzip(y)
     function untuplecast(dz)
-        dy = StructArray(map(unthunk, dz))
+        # dy = StructArray(map(unthunk, dz))  # fails for e.g. StructArray(([1,2,3], ZeroTangent()))
+        dy = broadcast(tuple, map(unthunk, dz)...)
         db, df, dargs... = back(dy)
-        (db, sum(df), map(unbroadcast, args, dargs)...)
+        return (db, sum(df), map(unbroadcast, args, dargs)...)
     end
+    untuplecast(dz::AbstractZero) = (NoTangent(), NoTangent(), map(Returns(dz), args))
     return z, untuplecast
 end
 
-# function rrule(cfg::RCR, ::typeof(collect∘tuplecast), f, args...)
-#     y, back = rrule(cfg, tuplecast, f, args...)
-#     return collect(y), back
-# end
+function rrule(cfg::RCR, ::typeof(collect∘tuplecast), f, args...)  # for testing, but doesn't work?
+    y, back = rrule(cfg, tuplecast, f, args...)
+    return collect(y), back
+end
 
 """
     tuplemap(f, args...)
@@ -64,18 +72,19 @@ function tuplemap(f::F, args...) where {F}
     # if any(a -> a isa CuArray, args)
     #     return unzip(map(f, args...))
     # end
-    StructArrays.components(StructArray(Iterators.map(f, args...)))
+    return StructArrays.components(StructArray(Iterators.map(f, args...)))
 end
 
-# function ChainRulesCore.rrule(cfg::RuleConfig{>:HasReverseMode}, ::typeof(tuplemap), f::F, args...) where {F}
-#     y, back = rrule(cfg, map, f, xs...)  # won't work, but also, you want the lazier fwd
-#     z = unzip(y)
-#     function untuplemap(dz)
-#         dy = StructArray(map(unthunk, dz))
-#         back(dy)
-#     end
-#     return unzip(xs), untuplemap
-# end
+function ChainRulesCore.rrule(cfg::RuleConfig{>:HasReverseMode}, ::typeof(tuplemap), f::F, xs...) where {F}
+    y, back = rrule_via_ad(cfg, map, f, xs...)
+    z = unzip(y)
+    function untuplemap(dz)
+        # dy = StructArray(map(unthunk, dz))  # fails for e.g. StructArray(([1,2,3], ZeroTangent()))
+        dy = broadcast(tuple, map(unthunk, dz)...)
+        return back(dy)
+    end
+    return z, untuplemap
+end
 
 """
     unzip(A)
@@ -84,8 +93,8 @@ Converts an array of tuples into a tuple of arrays.
 Eager. Will work by `reinterpret` when possible.
 
 ```jldoctest
-julia> ChainRules.unzip([(1,2), (3,4), (5,6)])  # makes two new Arrays:
-([1, 3, 5], [2, 4, 6])
+julia> ChainRules.unzip([(1,2), (30,40), (500,600)])  # makes two new Arrays:
+([1, 30, 500], [2, 40, 600])
 
 julia> typeof(ans)
 Tuple{Vector{Int64}, Vector{Int64}}
@@ -102,7 +111,7 @@ function unzip(xs::AbstractArray)
     x1 = first(xs)
     x1 isa Tuple || throw(ArgumentError("unzip only accepts arrays of tuples"))
     N = length(x1)
-    unzip(xs, Val(N))  # like Zygote's unzip, here this is the fallback case.
+    return unzip(xs, Val(N))  # like Zygote's unzip, here this is the fallback case.
 end
 
 @generated function unzip(xs, ::Val{N}) where {N}
@@ -122,16 +131,44 @@ unzip(xs::AbstractArray{Tuple{T}}) where {T} = (reinterpret(T, xs),)  # best cas
     Expr(:tuple, each...)
 end
 
+"""
+    unzip(t)
+
+Also works on a tuple of tuples:
+
+```jldoctest
+julia> unzip(((1,2), (30,40), (500,600)))
+((1, 30, 500), (2, 40, 600))
+```
+"""
+function unzip(xs::Tuple)
+    x1 = first(xs)
+    x1 isa Tuple || throw(ArgumentError("unzip only accepts arrays or tuples of tuples"))
+    return ntuple(i -> map(Get(i), xs),length(x1))
+end
+
 struct Get{i} end
 Get(i) = Get{Int(i)}()
 (::Get{i})(x) where {i} = x[i]
 
 function ChainRulesCore.rrule(::typeof(unzip), xs::AbstractArray{T}) where {T <: Tuple}
     function rezip(dy)
-        dxs = map(unthunk.(dy)...) do ys...
-            Tangent{T}(ys...)
+        dxs = broadcast(xs, unthunk.(dy)...) do x, ys...
+            ProjectTo(x)(Tangent{T}(ys...))
         end
-        (NoTangent(), dxs)
+        return (NoTangent(), dxs)
     end
+    rezip(dz::AbstractZero) = (NoTangent(), dz)
     return unzip(xs), rezip
 end
+
+function ChainRulesCore.rrule(::typeof(unzip), xs::Tuple)
+    function rezip_2(dy)
+        dxs = broadcast(xs, unthunk.(dy)...) do x, ys...
+            Tangent{typeof(x)}(ys...)
+        end
+        return (NoTangent(), ProjectTo(xs)(dxs))
+    end
+    rezip_2(dz::AbstractZero) = (NoTangent(), dz)
+    return unzip(xs), rezip_2
+end
diff --git a/test/tuplecast.jl b/test/tuplecast.jl
@@ -1,8 +1,8 @@
 
-using ChainRules: tuplecast, unzip  # tuplemap, 
+using ChainRules: tuplecast, unzip, tuplemap
 
 @testset "tuplecast.jl" begin
-    @testset "basics: $(sprint(show, fun))" for fun in [tuplecast, unzip∘broadcast] # [tuplemap, tuplecast, unzip∘map, unzip∘broadcast]
+    @testset "basics: $(sprint(show, fun))" for fun in [tuplemap, tuplecast, unzip∘map, unzip∘broadcast]
         @test_throws Exception fun(sqrt, 1:3)
 
         @test fun(tuple, 1:3, 4:6) == ([1, 2, 3], [4, 5, 6])
@@ -16,32 +16,69 @@ using ChainRules: tuplecast, unzip  # tuplemap,
         else
             @test fun(tuple, [1,2,3], [4 5]) == ([1 1; 2 2; 3 3], [4 5; 4 5; 4 5])
         end
+        
+        if fun == tuplemap
+            @test_broken fun(tuple, (1,2,3), (4,5,6)) == ((1, 2, 3), (4, 5, 6))
+        elseif fun == unzip∘map
+            @test fun(tuple, (1,2,3), (4,5,6)) == ((1, 2, 3), (4, 5, 6))
+        else
+            @test fun(tuple, (1,2,3), (4,5,6)) == ((1, 2, 3), (4, 5, 6))
+            @test fun(tuple, (1,2,3), (7,)) == ((1, 2, 3), (7, 7, 7))
+            @test fun(tuple, (1,2,3), 8) == ((1, 2, 3), (8, 8, 8))
+        end
+        @test fun(tuple, (1,2,3), [4,5,6]) == ([1, 2, 3], [4, 5, 6])  # mix tuple & vector
     end
+    
+    @testset "rrules" begin
+        # These exist to allow for second derivatives
 
-    # tuplemap(tuple, (1,2,3), (4,5,6)) == ([1, 2, 3], [4, 5, 6])
+        # test_rrule(collect∘tuplecast, tuple, [1,2,3.], [4,5,6.], collectheck_inferred=false) # return type Tuple{NoTangent, NoTangent, Vector{Float64}, Vector{Float64}} does not match inferred return type NTuple{4, Any}
+
+        y1, bk1 = rrule(CFG, tuplecast, tuple, [1,2,3.0], [4,5,6.0])
+        @test y1 == ([1, 2, 3], [4, 5, 6])
+        @test bk1(([1,10,100.0], [7,8,9.0]))[3] ≈ [1,10,100]
+        
+        # bk1(([1,10,100.0], NoTangent()))  # DimensionMismatch in FiniteDifferences
+        
+        y2, bk2 = rrule(CFG, tuplecast, tuple, [1,2,3.0], [4 5.0], 6.0)
+        @test y2 == ([1 1; 2 2; 3 3], [4 5; 4 5; 4 5], [6 6; 6 6; 6 6])
+        @test bk2(y2)[5] ≈ 36
 
+        y4, bk4 = rrule(CFG, tuplemap, tuple, [1,2,3.0], [4,5,6.0])
+        @test y4 == ([1, 2, 3], [4, 5, 6])
+        @test bk4(([1,10,100.0], [7,8,9.0]))[3] ≈ [1,10,100]
+    end
+    
     @testset "unzip" begin
         @test unzip([(1,2), (3,4), (5,6)]) == ([1, 3, 5], [2, 4, 6])
+        @test unzip(Any[(1,2), (3,4), (5,6)]) == ([1, 3, 5], [2, 4, 6])
+        
         @test unzip([(nothing,2), (3,4), (5,6)]) == ([nothing, 3, 5], [2, 4, 6])
         @test unzip([(missing,2), (missing,4), (missing,6)])[2] isa Base.ReinterpretArray
 
+        @test unzip([(1,), (3,), (5,)]) == ([1, 3, 5],)
+        @test unzip([(1,), (3,), (5,)])[1] isa Base.ReinterpretArray
+        
+        @test unzip(((1,2), (3,4), (5,6))) == ((1, 3, 5), (2, 4, 6))
+
+        # test_rrule(unzip, [(1,2), (3,4), (5.0,6.0)], check_inferred=false)  # DimensionMismatch: second dimension of A, 6, does not match length of x, 2
+
         y, bk = rrule(unzip, [(1,2), (3,4), (5,6)])
         @test y == ([1, 3, 5], [2, 4, 6])
         @test bk(Tangent{Tuple}([1,1,1], [10,100,1000]))[2] isa Vector{<:Tangent{<:Tuple}}
-    end
-    
-    @testset "rrules" begin
-        # These exist to allow for second derivatives
         
-        # test_rrule(collect∘tuplecast, tuple, [1,2,3.], [4,5,6.], check_inferred=false)
-        y1, bk1 = rrule(CFG, tuplecast, tuple, [1,2,3.0], [4,5,6.0])
-        @test y1 == ([1, 2, 3], [4, 5, 6])
-        @test bk1(([1,10,100.0], [7,8,9.0]))[3] ≈ [1,10,100]
+        y3, bk3 = rrule(unzip, [(1,ZeroTangent()), (3,ZeroTangent()), (5,ZeroTangent())])
+        @test y3 == ([1, 3, 5], [ZeroTangent(), ZeroTangent(), ZeroTangent()])
+        dx3 = bk3(Tangent{Tuple}([1,1,1], [10,100,1000]))[2]
+        @test dx3 isa Vector{<:Tangent{<:Tuple}}
+        @test Tuple(dx3[1]) == (1.0, NoTangent())
         
-        y2, bk2 = rrule(CFG, tuplecast, tuple, [1,2,3.0], [4 5.0], 6.0)
-        @test y2 == ([1 1; 2 2; 3 3], [4 5; 4 5; 4 5], [6 6; 6 6; 6 6])
-        @test bk2(y2)[5] ≈ 36
-
-        test_rrule(unzip, [(1.0, 2.0), (3.0, 4.0), (5.0, 6.0)], check_inferred=false)
+        y5, bk5 = rrule(unzip, ((1,2), (3,4), (5,6)))
+        @test y5 == ((1, 3, 5), (2, 4, 6))
+        @test bk5(y5)[2] isa Tangent{<:Tuple}
+        @test Tuple(bk5(y5)[2][2]) == (3, 4)
+        dx5 = bk5(((1,10,100), ZeroTangent()))
+        @test dx5[2] isa Tangent{<:Tuple}
+        @test Tuple(dx5[2][2]) == (10, ZeroTangent())
     end
 end