one more dict to allow artificial ties

mcabbott · mcabbott · commit e84b61bb7e8f · 2022-08-28T12:05:42.000-04:00
diff --git a/src/interface.jl b/src/interface.jl
@@ -45,16 +45,23 @@ end
 function update!(tree, model, grad)
   # First walk is to accumulate the gradient. This recursion visits every copy of
   # shared leaves, but stops when branches are absent from the gradient:
-  dict = IdDict{Leaf, Any}()
-  grads!(dict, tree, model, grad)
-  # Second walk is to update the model, using same fmap walk as setup, thus each Leaf exactly once:
+  gdict = IdDict{Leaf, Any}()
+  grads!(gdict, tree, model, grad)
+  # Second walk is to update the model, using same fmap walk as setup:
+  xdict = IdDict{Leaf, Any}()  # (this exists to allow for shared ℓ without shared x)
   newmodel = fmap(model, tree; exclude = isnumeric) do x, ℓ
     ℓ isa Leaf || error("this state does not match the model, expected a Leaf here")
     ℓ.frozen && return x
-    haskey(dict, ℓ) || return x
-    s′, x̄′ = apply!(ℓ.rule, ℓ.state, x, dict[ℓ])
+    haskey(gdict, ℓ) || return x  # no gradient seen, nothing to do
+    if haskey(xdict, ℓ)
+      # This means that shared ℓ encodes sharing not noted in x. Won't happen with setup above, no API yet.
+      x′ = xdict[ℓ]  # ... and is why xdict exists.
+      size(x′) == size(x) || error("the same Leaf belongs to arrays of size $(size(x)) and $(size(x′))")
+      return x′
+    end
+    s′, x̄′ = apply!(ℓ.rule, ℓ.state, x, gdict[ℓ])
     ℓ.state = s′  # to get state out of here, rely on mutability of Leaf
-    subtract!(x, x̄′)
+    xdict[ℓ] = subtract!(x, x̄′)
   end
   tree, newmodel  # note that tree is guaranteed to be updated
 end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -292,6 +292,22 @@ Optimisers.trainable(x::TwoThirds) = (a = x.a,)
          # Error: no constructors for type Any
          @test_broken s4, t4 = Optimisers.update(stri, tri, g4)
        end
+       
+       @testset "artificial" begin
+         # Interpret shared Leaf as implying shared parameters, even if this did not arise from shared arrays.
+         # No API for setting this at the moment, but can construct one by hand:
+         model = (a = [1,2.0], b = [1, 2.0], c = [1, 2.0], d = [1, 2.0])
+         honest = Optimisers.setup(Momentum(0.1), model)
+         trick = (a = honest.a, b = honest.a, c = honest.c, d= honest.d)  # makes a & b shared
+  
+         trick2, model2 = Optimisers.update(trick, model, (a=[3,3], b=[7,7], c=[3,3], d=[10, 10]))
+         trick3, model3 = Optimisers.update(trick2, model2, (a=[3,3], b=[7,7], c=[3,3], d=[10, 10]))
+         
+         @test model3.a == model3.b == model3.d  # same as having the gradients added
+         @test !(model3.a ≈ model3.c)
+         @test trick3.a === trick3.b  # leaves remain shared
+         model3.a === model3.b  # in fact arrays end up shared, but this is not required
+       end
     end
 
   end