Merge pull request #1871 from mkschleg/rnn_benchmarks

ToucheSir · web-flow · commit dc6f28610a9b · 2022-03-25T11:52:22.000-07:00
Recurrent benchmarks
diff --git a/perf/bench_utils.jl b/perf/bench_utils.jl
@@ -1,13 +1,14 @@
 using BenchmarkTools
 using Flux
 using CUDA
-using Zygote: pullback
+using Zygote: pullback, ignore
 
 
 fw(m, x) = m(x)
 bw(back) = back(1f0)
-fwbw(m, ps, x) = gradient(() -> sum(m(x)), ps)
-  
+fwbw(m, ps, x) = gradient(() -> sum(fw(m, x)), ps)
+pb(m, ps, x) = pullback(() -> sum(fw(m, x)), ps)
+
 function run_benchmark(model, x; cuda=true)
     
     if cuda 
@@ -16,7 +17,7 @@ function run_benchmark(model, x; cuda=true)
     end
 
     ps = Flux.params(model)
-    y, back = pullback(() -> sum(model(x)), ps)
+    y, back =  pb(model, ps, x)
 
 
     if cuda
diff --git a/perf/recurrent.jl b/perf/recurrent.jl
@@ -0,0 +1,62 @@
+
+
+struct RNNWrapper{T}
+  rnn::T
+end
+Flux.@functor RNNWrapper
+
+# Need to specialize for RNNWrapper.
+fw(r::RNNWrapper, X::Vector{<:AbstractArray}) = begin
+  Flux.reset!(r.rnn)
+  [r.rnn(x) for x in X]
+end
+
+fw(r::RNNWrapper, X) = begin
+  Flux.reset!(r.rnn)
+  r.rnn(X)
+end
+
+fwbw(r::RNNWrapper, ps, X::Vector{<:AbstractArray}) = gradient(ps) do
+  y = fw(r, X)
+  sum(sum(y))
+end
+
+pb(r::RNNWrapper, ps, X::Vector{<:AbstractArray}) = pullback(ps) do
+  y = fw(r, X)
+  sum(sum(y))
+end
+
+function rnn_benchmark_sweep(data_creator::Function, rnn_type)
+  for n in [2, 20, 200, 1000], ts in [1, 4, 16, 64]
+    x, x_n = data_creator(n, ts)
+    model = RNNWrapper(rnn_type(n, n))
+    
+    println("$rnn_type $x_n CPU n=$n, ts=$ts")
+    run_benchmark(model, x, cuda=false)
+    
+    println("$rnn_type $x_n CUDA n=$n, ts=$ts")
+    try
+      run_benchmark(model, x, cuda=true)
+    catch ex
+      @show typeof(ex)
+      if ex isa OutOfGPUMemoryError
+        @warn "Not enough GPU memory to run test"
+      else
+        rethrow(ex)
+      end
+    end
+  end  
+end
+
+for rnn_type in [Flux.RNN, Flux.GRU, Flux.LSTM]
+  rnn_benchmark_sweep(rnn_type) do n, ts
+    [randn(Float32, n, n) for _ in 1:ts], "Vec"
+  end
+end
+
+for rnn_type in [Flux.RNN, Flux.GRU, Flux.LSTM]
+  rnn_benchmark_sweep(rnn_type) do n, ts
+    randn(Float32, n, n, ts), "Block"
+  end
+end
+
diff --git a/perf/runbenchmarks.jl b/perf/runbenchmarks.jl
@@ -11,3 +11,6 @@ include("conv.jl")
 
 @info "Benchmark VGG"
 include("vgg.jl")
+
+@info "Benchmark Recurrent"
+include("recurrent.jl")