fixup

mcabbott · mcabbott · commit 85252bf201a2 · 2022-08-24T01:03:42.000-04:00
diff --git a/docs/src/models/basics.md b/docs/src/models/basics.md
@@ -216,7 +216,7 @@ m(5) # => 26
 Flux provides a set of helpers for custom layers, which you can enable by calling
 
 ```julia
-Flux.@functor Affine
+Flux.@layer Affine
 ```
 
 This enables a useful extra set of functionality for our `Affine` layer, such as [collecting its parameters](../training/optimisers.md) or [moving it to the GPU](../gpu.md).
diff --git a/src/Flux.jl b/src/Flux.jl
@@ -7,6 +7,7 @@ using MacroTools: @forward
 
 @reexport using NNlib
 using MLUtils
+const stack = MLUtils.stack  # now exported by Base
 import Optimisers: Optimisers, trainable, destructure  # before v0.13, Flux owned these functions
 
 using Zygote, ChainRulesCore
diff --git a/src/layers/basic.jl b/src/layers/basic.jl
@@ -338,7 +338,7 @@ struct SkipConnection{T,F}
   connection::F  #user can pass arbitrary connections here, such as (a,b) -> a + b
 end
 
-@layer SkipConnection  # should this be expand?
+@layer :expand SkipConnection
 
 function (skip::SkipConnection)(input)
   skip.connection(skip.layers(input), input)
diff --git a/src/layers/macro.jl b/src/layers/macro.jl
@@ -3,25 +3,50 @@
     @layer Dense
     @layer :expand Chain
     @layer BatchNorm trainable=(β,γ)
-    @layer Struct functor=(α,β) trainable=(β,)
+    @layer Struct children=(α,β) trainable=(β,)
     
 This macro replaces most uses of `@functor` in Flux 0.14. Its basic purpose is the same:
 When you define a new layer, this tells Flux to explore inside it
 to see the parameters it trains, and also to move them to the GPU, change precision, etc.
 
 Some "keywords" allow control of the recursion:
 * If some fields look like parameters but should not be trained,
-  then `Optimisers.trainable` lets you specify fields to include, and ignore the rest.
-* We can likewise add restructions to `Functors.functor`, but not yet written.
-* In fact you can provide an arbitrary keyword with this syntax, and it will
-  overload this function alla `trainable`... that might be a terrible idea.
+  then `trainable` lets you specify fields to include, and ignore the rest.
+* We can likewise add restructions to Functors's `children`, 
+  but this is not yet written (as this is seldom a good idea).
 
 It also handles overloads of `show` for pretty printing.
 * By default, it adds methods to 3-arg `Base.show` to treat your layer much like `Dense` or `Conv`.
 * If your layer is a container, more like `Chain` or `Parallel`, then `:expand` makes `show` unfold its contents.
-* To disable all `show` overloads, maybe we want a `:ignore` option too.
+* To disable all `show` overloads, there is an `:ignore` option too.
 
 (You probably still want to define 2-arg `show(io::IO, x::Layer)`, the macro does not touch this.)
+
+Note that re-running the macro with different options does not overwrite all methods, you will need to restart.
+  
+# Example
+```jldoctest
+julia> struct Trio; a; b; c end
+
+julia> tri = Trio(Dense([1.1 2.2],), Dense([3.3;;], false), Dropout(0.4))
+Trio(Dense(1 => 1, tanh), Dense(1 => 1; bias=false), Dropout(0.4))
+
+julia> Flux.destructure(tri)  # parameters not visible to Flux
+(Bool[], Restructure(Trio, ..., 0))
+
+julia> Flux.@layer :expand Trio
+
+julia> Flux.destructure(tri)  # now gpu, train!, etc will see inside too
+([1.1, 2.2, 0.0, 3.3], Restructure(Trio, ..., 4))
+
+julia> tri
+Trio(
+  Dense(2 => 1),                        # 3 parameters
+  Dense(1 => 1; bias=false),            # 1 parameters
+  Dropout(0.4),
+)                   # Total: 3 arrays, 4 parameters, 224 bytes.
+```
+
 """
 macro layer(exs...)
   out = quote end
@@ -40,10 +65,10 @@ macro layer(exs...)
   end
 
   # This function exists only for depwarns when you use @functor directly
-  push!(out.args, :(Flux._check_new_macro(::$(esc(type))) = nothing)) # scope is weird ?? can't use $ on func name?
+  push!(out.args, :(Flux._check_new_macro(::$(esc(type))) = nothing))
   
-  i = findfirst(ex -> Meta.isexpr(ex, :(=)) && ex.args[1] == :functor, rest)
-  if isnothing(i)
+  i = findfirst(ex -> Meta.isexpr(ex, :(=)) && ex.args[1] == :children, rest)
+  if isnothing(i)  # then default like @functor Layer
     push!(out.args, _macro_functor(esc(type)))
   else
     push!(out.args, _macro_functor(esc(type), rest[i].args[2]))
@@ -52,13 +77,14 @@ macro layer(exs...)
     j == i && continue
     ex = rest[j]
     Meta.isexpr(ex, :(=)) || error("expected keyword = fields")
-    if ex.args[1] == :trainable
-      push!(out.args, _macro_trainable(type, trainable, ex.args[2]))  # pass the function "trainable" not the symbol
+
+    name = if ex.args[1] == :trainable
+      :(Optimisers.trainable)
     else
-      error()
-      # @warn "defining a method for $(ex.args[1]) in your scope"  # ??
-      # push!(out.args, _macro_trainable(type, esc(ex.args[1]), ex.args[2]))
+      @warn "trying to define a method for `$(ex.args[1])` in your scope... this is experimental" maxlog=1
+      esc(ex.args[1])
     end
+    push!(out.args, _macro_trainable(esc(type), name, ex.args[2]))
   end
 
   out
@@ -72,17 +98,16 @@ function _check_new_macro(x::T) where T
 end
 _check_new_macro(::Tuple) = nothing  # defined by Functors.jl, not by users
 _check_new_macro(::NamedTuple) = nothing
-_check_new_macro(::Transpose) = nothing
-_check_new_macro(::Adjoint) = nothing
+_check_new_macro(::AbstractArray) = nothing
 _check_new_macro(::Ref) = nothing
 
 # @layer's code for Functors & Adapt
 # Unlike @functor, _default_functor doesn't need to eval anything
 
 function _macro_functor(type)
   quote
-    Functors.functor(::Type{T}, x) where {T<:$type} = _default_functor(T, x)
-    Adapt.adapt_structure(to, layer::$type) = fmap(adapt(to), layer)
+    Functors.functor(::Type{T}, x) where {T<:$type} = $_default_functor(T, x)
+    Adapt.adapt_structure(to, layer::$type) = $fmap($adapt(to), layer)
   end
 end
 
@@ -94,12 +119,13 @@ function _default_functor(::Type{T}, x) where {T}
    if @generated
      F = fieldnames(T)
      args = map(sy -> :(getfield(x, $(QuoteNode(sy)))), F)
-     C = Base.typename(T).name  # constructor
+     C = Base.typename(T).wrapper  # constructor
      recon = VERSION > v"1.9-" ? :(Splat($C)) : :(Base.splat($C))
      :((NamedTuple{$F}(($(args...),)), $recon))
    else
      # Getting this parameterless type takes about 2μs, every time:
-     namedtuple(x), Base.splat(Base.typename(T).wrapper)
+     spl = VERSION > v"1.9-" ? Splat : Base.splat
+     namedtuple(x), spl(Base.typename(T).wrapper)
    end
 end
  
@@ -117,61 +143,12 @@ function _macro_trainable(type, fun, fields)
   quoted = map(QuoteNode, symbols)
   gets = [:(getfield(x, $f)) for f in quoted]
   quote
-    # $fun(x::$type) = NamedTuple{$names}(($(gets...),))
-    Flux.trainable(x::$type) = NamedTuple{$symbols}(($(gets...),))  # ?? scope is weird
+    $fun(x::$type) = NamedTuple{$symbols}(($(gets...),))
+    # Flux.trainable(x::$type) = NamedTuple{$symbols}(($(gets...),))  # ?? scope is weird
   end
 end
 _macro_trainable(type, fun, field::Union{Symbol,QuoteNode}) = _macro_trainable(type, fun, :(($field,)))  # lets you forget a comma
 
 _noquotenode(s::Symbol) = s
 _noquotenode(q::QuoteNode) = q.value  # lets you write trainable=(:x,:y) instead of (x,y)
 _noquotenode(ex) = error("expected a symbol, got $ex")
-
-
-
-
-
-
-# @big_show Chain
-# @big_show Parallel
-# @big_show SkipConnection
-# @big_show Recur
-# @big_show Maxout
-
-
-
-
-"""
-    @big_show MyContainer
-
-This macro lets you opt-in to Flux's fancy printing.
-
-When `model::MyContainer` is returned at the REPL it will be treated like `Chain`,
-and the printing routine will recursively unfold its children.
-This is triggered by adding a method to 3-arg `Base.show(io::IO, ::MIME"text/plain", l::MyContainer)`.
-
-Custom layers which do not contain other layers (more like `Dense` than like `Chain`)
-need not call this, and should simply define 2-arg `Base.show(io::IO, l::MyLayer)`.
-
-# Example
-```jldoctest
-julia> struct Trio{A,B,C}; a::A; b::B; c::C end
-
-julia> Flux.@functor Trio
-
-julia> Flux.@big_show Trio
-
-julia> tri = Trio(Dense(10=>5,tanh), Dense(5=>2), softmax)
-Trio(
-  Dense(10 => 5, tanh),                 # 55 parameters
-  Dense(5 => 2),                        # 12 parameters
-  NNlib.softmax,
-)                   # Total: 4 arrays, 67 parameters, 492 bytes.
-```
-
-Note that there is no automatic method for 2-arg `show`, and thus
-something like `(tri, tri)` will print all the type parameters. 
-
-However, `Chain(tri, tri)` will always use Flux's recursive printing,
-even without using this macro: `Chain` is the entry point.
-"""
diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl
@@ -178,6 +178,8 @@ end
 testmode!(m::AlphaDropout, mode=true) =
   (m.active = (isnothing(mode) || mode == :auto) ? nothing : !mode; m)
 
+Base.show(io::IO, d::AlphaDropout) = print(io, "AlphaDropout(", d.p, ")")
+
 """
     LayerNorm(size..., λ=identity; affine=true, ϵ=1fe-5)
 
diff --git a/src/layers/show.jl b/src/layers/show.jl
@@ -15,7 +15,7 @@ function _macro_big_show(ex)
         end
         
         # Don't show Chain(Tuple(...)), always splat that:
-        _show_children(x::$ex) = _flat_children(x)
+        Flux._show_children(x::$ex) = _flat_children(x)
     end
 end
 
@@ -56,12 +56,10 @@ _show_leaflike(::Tuple{Vararg{<:AbstractArray}}) = true  # e.g. parameters of LS
 # _show_leaflike(::Scale) = true                           # appears inside LayerNorm
 _show_leaflike(::AbstractArray{<:Number}) = true         # e.g. transposed arrays
 
-_show_children(x) = trainable(x)  # except for layers which hide their Tuple:
-# _show_children(c::Chain) = c.layers
-# _show_children(m::Maxout) = m.layers
-# _show_children(p::Parallel) = (p.connection, p.layers...)
-# _show_children(f::PairwiseFusion) = (f.connection, f.layers...)
-
+_show_children(x) = trainable(x)
+# This used to have methods for Chain, Maxout, Parallel, PairwiseFusion. Now @layer instead
+# writes a method to use this function. It flattens the Tuple within Chain etc.
+# (Some still special-cased above, for printing of layer names when NamedTuple.)
 function _flat_children(x)
     alpha = map(f -> getfield(x, f), fieldnames(typeof(x)))
     beta = map(y -> y isa Union{Tuple, NamedTuple} ? y : (y,), alpha)
@@ -79,25 +77,11 @@ function _macro_layer_show(ex)
               show(io, x)
             end
         end
-        
-        # Exit from _big_show recursion, do we need this and _show_leaflike?
-        _big_show(io::IO, obj::$ex, indent::Int=0, name=nothing) = _layer_show(io, obj, indent, name)
-        # Since this isn't a container, do not recurse into its children, if any:
-        _show_leaflike(::$ex) = true
+
+        # Exit from _big_show recursion:
+        Flux._big_show(io::IO, obj::$ex, indent::Int=0, name=nothing) = _layer_show(io, obj, indent, name)
     end
 end
-# for T in [
-#     :Conv, :ConvTranspose, :CrossCor, :Dense, :Scale, :Bilinear, :Embedding,
-#     :BatchNorm, :LayerNorm, :InstanceNorm, :GroupNorm,
-#   ]
-#   @eval function Base.show(io::IO, m::MIME"text/plain", x::$T)
-#     if !get(io, :compact, false)
-#       _layer_show(io, x)
-#     else
-#       show(io, x)
-#     end
-#   end
-# end
 
 function _layer_show(io::IO, layer, indent::Int=0, name=nothing)
   _str = isnothing(name) ? "" : "$name = "
diff --git a/test/layers/macro.jl b/test/layers/macro.jl
@@ -0,0 +1,35 @@
+using Flux, Functors, Optimisers
+
+module MacroTest
+  using Flux: @layer
+
+  struct Duo{T,S}; x::T; y::S; end
+  @layer :expand Duo
+
+  struct Trio; a; b; c end
+  @layer Trio trainable=(a,b) test=(c) # should be (c,) but it lets you forget
+
+  # struct TwoThirds; a; b; c; end
+  # Flux.@layer :expand TwoThirds children=(a,c) trainable=(a)  # should be (a,) but it lets you forget
+
+end
+
+@testset "@layer macro" begin
+  @test !isdefined(MacroTest, :Flux)  # That's why the module, to check scope
+
+  m2 = MacroTest.Duo(Dense(2=>2), Chain(Flux.Scale(2), Dropout(0.2)))
+
+  @test Functors.children(m2) isa NamedTuple{(:x, :y)}
+  @test length(Optimisers.destructure(m2)[1]) == 10
+
+  m3 = MacroTest.Trio([1.0], [2.0], [3.0])
+
+  @test Functors.children(m3) isa NamedTuple{(:a, :b, :c)}
+  @test fmap(zero, m3) isa MacroTest.Trio
+
+  @test Optimisers.trainable(m3) isa NamedTuple{(:a, :b)}
+  @test Optimisers.destructure(m3)[1] == [1, 2]
+
+  @test MacroTest.test(m3) == (c = [3.0],)
+end
+
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -38,6 +38,7 @@ Random.seed!(0)
     include("layers/conv.jl")
     include("layers/upsample.jl")
     include("layers/show.jl")
+    include("layers/macro.jl")
   end
 
   @testset "outputsize" begin
diff --git a/test/utils.jl b/test/utils.jl
@@ -723,7 +723,7 @@ end
         a::A
         b::A
     end
-    Flux.@functor Model
+    Flux.@layer Model
     (m::Model)(x) = m.a(x) .+ m.b(x)
 
     d = Dense(1, 1)