From afb4099e6d7ad1f58ecbf45f5ebfa593c7ef5009 Mon Sep 17 00:00:00 2001
From: Unknown <l.koomen92@gmail.com>
Date: Wed, 13 May 2020 15:37:45 +0200
Subject: [PATCH 01/12] Added upsample layer

Added BilinearUpsample2d layer
---
 src/Flux.jl            |   1 +
 src/layers/upsample.jl | 162 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 163 insertions(+)
 create mode 100644 src/layers/upsample.jl

diff --git a/src/Flux.jl b/src/Flux.jl
index 90dcb63045..a45315c3c5 100644
--- a/src/Flux.jl
+++ b/src/Flux.jl
@@ -36,6 +36,7 @@ include("layers/basic.jl")
 include("layers/conv.jl")
 include("layers/recurrent.jl")
 include("layers/normalise.jl")
+include("layers/upsample.jl")
 
 include("data/Data.jl")
 
diff --git a/src/layers/upsample.jl b/src/layers/upsample.jl
new file mode 100644
index 0000000000..874afd86dc
--- /dev/null
+++ b/src/layers/upsample.jl
@@ -0,0 +1,162 @@
+"""
+    BilinearUpsample2d(factors::Tuple{Integer,Integer})
+
+Create an upsampling layer that uses bilinear interpolation to upsample the 1st and 2nd dimension of
+a 4-dimensional input array . The size of the output array will be equal to
+`(factors[1]*S1, factors[2]*S2, S3, S4)`, where `S1,S2,S3,S4 = size(input_array)`.
+
+# Examples
+```jldoctest; setup = :(using Flux: BilinearUpsample2d; using Random; Random.seed!(0))
+julia> b = Flux.BilinearUpsample2d((2, 2))
+BilinearUpsample2d(2, 2)
+julia> b(rand(2, 2, 1, 1))
+4×4×1×1 Array{Float64,4}:
+[:, :, 1, 1] =
+ 0.823648  0.658877  0.329336  0.164566
+ 0.845325  0.675933  0.337149  0.167757
+ 0.888679  0.710044  0.352773  0.174138
+ 0.910357  0.7271    0.360586  0.177329```
+"""
+struct BilinearUpsample2d{}
+    factors::Tuple{T,T} where T<:Integer
+    BilinearUpsample2d(factors::Tuple{R,R}) where R<:Integer = new(factors)
+    BilinearUpsample2d(factors::F) where F<:Integer = new((factors, factors))
+end
+
+@functor BilinearUpsample2d
+
+function (c::BilinearUpsample2d)(x::AbstractArray)
+    bilinear_upsample2d(x, c.factors)
+end
+
+function Base.show(io::IO, l::BilinearUpsample2d)
+  print(io, "BilinearUpsample2d( $(l.factors[1]), $(l.factors[2]) )")
+end
+
+"""
+    `construct_xq(n::T, m::T) where T<:Integer`
+
+Creates interpolation points for resampling, creates the same grid as used in Image.jl `imresize`.
+"""
+@nograd function construct_xq(n::T, m::T) where T<:Integer
+    typed1 = one(n)
+    typed2 = 2typed1
+    step = n // m
+    offset = (n + typed1)//typed2 - step//typed2 - step*(m//typed2 - typed1)
+    x = range(offset, step=step, length=m)
+    xq = clamp.(x, typed1//typed1, n//typed1)
+    return xq
+end
+
+"""
+    `get_inds_and_ws(xq, dim, n_dims)`
+
+Creates interpolation lower and upper indices, and broadcastable weights
+"""
+@nograd function get_inds_and_ws(xq, dim, n_dims)
+    n = length(xq)
+
+    ilow = floor.(Int, xq)
+    ihigh = ceil.(Int, xq)
+
+    wdiff = xq .- ilow
+
+    newsizetup = tuple((i == dim ? n : 1 for i in 1:n_dims)...)
+    wdiff = reshape(wdiff, newsizetup)
+
+    return ilow, ihigh, wdiff
+end
+
+"""
+    adjoint_of_idx(idx ::Vector{T}) where T<:Integer
+
+# Arguments
+- `idx::Vector{T<:Integer}`: a vector of indices from which you want the adjoint.
+
+# Outputs
+-`idx_adjoint`: index that inverses the operation `x[idx]`.
+
+# Explanation
+Determines the adjoint of the vector of indices `idx`, based on the following assumptions:
+* `idx[1] == 1`
+* `all(d in [0,1] for d in diff(idx))`
+
+The adjoint of `idx` can be seen as an inverse operation:
+```jldoctest
+x[idx][idx_adjoint] == x
+```
+
+The above holds as long as `idx` contains every index in `x`.
+ """
+@nograd function adjoint_of_idx(idx::Vector{T}) where T<:Integer
+    d = trues(size(idx))
+    d[2:end] .= diff(idx, dims=1)
+    idx_adjoint = findall(d)
+    return idx_adjoint
+end
+
+@nograd function get_newsize(oldsize, k_upsample)
+    newsize = (i <= length(k_upsample) ? s*k_upsample[i] : s for (i,s) in enumerate(oldsize))
+    return tuple(newsize...)
+end
+
+"""
+    `bilinear_upsample2d(img::AbstractArray{T,4}, k_upsample::NTuple{2,<:Real}) where T`
+
+# Arguments
+- `img::AbstractArray`: the array to be upsampled, must have at least 2 dimensions.
+- `k_upsample::NTuple{2}`: a tuple containing the factors with which the first two dimensions of `img` are upsampled.
+
+# Outputs
+- `imgupsampled::AbstractArray`: the upsampled version of `img`. The size of `imgupsampled` is
+equal to `(k_upsample[1]*S1, k_upsample[2]*S2, S3, S4)`, where `S1,S2,S3,S4 = size(img)`.
+
+# Explanation
+Upsamples the first two dimensions of the 4-dimensional array `img` by the two upsample factors stored in `k_upsample`,
+using bilinear interpolation. The interpolation grid is identical to the one used by `imresize` from `Images.jl`.
+"""
+function bilinear_upsample2d(img::AbstractArray{T,4}, k_upsample::NTuple{2,<:Real}) where T
+
+    ilow1, ihigh1, wdiff1, ilow2, ihigh2_r, wdiff2 = setup_upsample(size(img), eltype(img), k_upsample)
+
+    @inbounds imgupsampled = bilinear_upsample_workhorse(img, ilow1, ihigh1, wdiff1, ilow2, ihigh2_r, wdiff2)
+
+    return imgupsampled
+end
+
+"""
+    `bilinear_upsample_workhorse(img, ilowx, ihighx, wdiffx, ilowy, ihigh2_r, wdiffy)`
+
+Does the heavy lifting part of the bilinear upsampling operation
+"""
+function bilinear_upsample_workhorse(img, ilow1, ihigh1, wdiff1, ilow2, ihigh2_r, wdiff2)
+    imgupsampled = @view(img[ilow1,ilow2,:,:]) .* (1 .- wdiff1) .+ @view(img[ihigh1,ilow2,:,:]) .* wdiff1
+    imgupsampled = imgupsampled .* (1 .- wdiff2) .+ @view(imgupsampled[:,ihigh2_r,:,:]) .* wdiff2
+end
+
+"""
+    `setup_upsample(imgsize::NTuple{4,<:Integer}, imgdtype, k_upsample::NTuple{2,<:Real})`
+
+Creates arrays of interpolation indices and weights for the bilinear_upsample2d operation.
+"""
+@nograd function setup_upsample(imgsize::NTuple{4,<:Integer}, imgdtype, k_upsample::NTuple{2,<:Real})
+    n_dims = 4
+    newsize = get_newsize(imgsize, k_upsample)
+
+    # Create interpolation grids
+    xq1 = construct_xq(imgsize[1], newsize[1])
+    xq2 = construct_xq(imgsize[2], newsize[2])
+
+    # Get linear interpolation lower- and upper index, and weights
+    ilow1, ihigh1, wdiff1 = get_inds_and_ws(xq1, 1, n_dims)
+    ilow2, ihigh2, wdiff2 = get_inds_and_ws(xq2, 2, n_dims)
+
+    # Adjust the upper interpolation indices of the second dimension
+    ihigh2_r = adjoint_of_idx(ilow2)[ihigh2]
+
+    wdiff1 = imgdtype.(wdiff1)
+    wdiff2 = imgdtype.(wdiff2)
+
+    return ilow1, ihigh1, wdiff1, ilow2, ihigh2_r, wdiff2
+
+end

From 617227980075fb1b390864244be5b79007601e47 Mon Sep 17 00:00:00 2001
From: Unknown <l.koomen92@gmail.com>
Date: Wed, 13 May 2020 15:37:55 +0200
Subject: [PATCH 02/12] Added tests

---
 test/layers/upsample.jl | 33 +++++++++++++++++++++++++++++++++
 test/runtests.jl        |  1 +
 2 files changed, 34 insertions(+)
 create mode 100644 test/layers/upsample.jl

diff --git a/test/layers/upsample.jl b/test/layers/upsample.jl
new file mode 100644
index 0000000000..c407067311
--- /dev/null
+++ b/test/layers/upsample.jl
@@ -0,0 +1,33 @@
+using Flux: BilinearUpsample2d
+using Test
+
+@testset "BilinearUpsample2d" begin
+  @test size(BilinearUpsample2d((2, 2))(rand(2, 2, 1, 1))) == (4, 4, 1, 1)
+  @test size(BilinearUpsample2d((3, 3))(rand(2, 2, 1, 1))) == (6, 6, 1, 1)
+  @test size(BilinearUpsample2d((2, 3))(rand(2, 2, 10, 10))) == (4, 6, 10, 10)
+  @test size(BilinearUpsample2d((3, 2))(rand(2, 2, 10, 10))) == (6, 4, 10, 10)
+
+  @test_throws MethodError BilinearUpsample2d((2, 2))(rand(2, 2))
+
+  @test BilinearUpsample2d((3, 2))([1. 2.; 3. 4.][:,:,:,:]) ≈
+   [1//1  5//4    7//4    2//1;
+    1//1  5//4    7//4    2//1;
+    5//3  23//12  29//12  8//3;
+    7//3  31//12  37//12  10//3;
+    3//1  13//4   15//4   4//1;
+    3//1  13//4   15//4   4//1][:,:,:,:]
+
+    testimg1 = [1. 0.; 0 0][:,:,:,:]
+    factors1 = (3, 2)
+    f1(x) = sum(BilinearUpsample2d(factors1)(x))
+    df1(x) = Flux.gradient(f1, x)[1]
+    @test df1(testimg1) ≈ fill(eltype(testimg1).(prod(factors)), size(testimg1))
+
+    testimg2 = [1. 0.; 0 0][:,:,:,:]
+    factors2 = (3, 2)
+    f2(x) = BilinearUpsample2d(factors2)(x)[3,2]
+    df2(x) = Flux.gradient(f2, x)[1]
+    @test df2(testimg2) ≈
+    [1//2  1//6
+     1//4  1//12][:,:,:,:]     
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index c2ea0715cf..9f269a4661 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -30,6 +30,7 @@ Random.seed!(0)
     include("layers/normalisation.jl")
     include("layers/stateless.jl")
     include("layers/conv.jl")
+include("layers/upsample.jl")
   end
 
   @testset "CUDA" begin

From a1d78a318697d15bd1d9b4c80b804e746f75ae8c Mon Sep 17 00:00:00 2001
From: Unknown <l.koomen92@gmail.com>
Date: Wed, 13 May 2020 17:26:39 +0200
Subject: [PATCH 03/12] Fixed test error

---
 test/layers/upsample.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/layers/upsample.jl b/test/layers/upsample.jl
index c407067311..9122cbed06 100644
--- a/test/layers/upsample.jl
+++ b/test/layers/upsample.jl
@@ -21,7 +21,7 @@ using Test
     factors1 = (3, 2)
     f1(x) = sum(BilinearUpsample2d(factors1)(x))
     df1(x) = Flux.gradient(f1, x)[1]
-    @test df1(testimg1) ≈ fill(eltype(testimg1).(prod(factors)), size(testimg1))
+    @test df1(testimg1) ≈ fill(eltype(testimg1).(prod(factors1)), size(testimg1))
 
     testimg2 = [1. 0.; 0 0][:,:,:,:]
     factors2 = (3, 2)
@@ -29,5 +29,5 @@ using Test
     df2(x) = Flux.gradient(f2, x)[1]
     @test df2(testimg2) ≈
     [1//2  1//6
-     1//4  1//12][:,:,:,:]     
+     1//4  1//12][:,:,:,:]
 end

From a7a54c6a31cbac7dd04e23015b3280a07db9924b Mon Sep 17 00:00:00 2001
From: Unknown <l.koomen92@gmail.com>
Date: Wed, 13 May 2020 17:53:56 +0200
Subject: [PATCH 04/12] Cast weights to CuArray if needed

---
 src/layers/upsample.jl | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/layers/upsample.jl b/src/layers/upsample.jl
index 874afd86dc..4143e0bb15 100644
--- a/src/layers/upsample.jl
+++ b/src/layers/upsample.jl
@@ -130,6 +130,10 @@ end
 Does the heavy lifting part of the bilinear upsampling operation
 """
 function bilinear_upsample_workhorse(img, ilow1, ihigh1, wdiff1, ilow2, ihigh2_r, wdiff2)
+    if typeof(img) <: CuArray
+        wdiff1 = CuArray(wdiff1)
+        wdiff2 = CuArray(wdiff2)
+    end
     imgupsampled = @view(img[ilow1,ilow2,:,:]) .* (1 .- wdiff1) .+ @view(img[ihigh1,ilow2,:,:]) .* wdiff1
     imgupsampled = imgupsampled .* (1 .- wdiff2) .+ @view(imgupsampled[:,ihigh2_r,:,:]) .* wdiff2
 end

From 9b5ca6f4e3b6cceb5af14ff2a0ca05c9c4195fd6 Mon Sep 17 00:00:00 2001
From: Unknown <l.koomen92@gmail.com>
Date: Thu, 14 May 2020 09:18:30 +0200
Subject: [PATCH 05/12] Moved T to type signature, moved constructors

Also removed a faulty doctest
---
 src/layers/upsample.jl | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/layers/upsample.jl b/src/layers/upsample.jl
index 4143e0bb15..6cfb0db484 100644
--- a/src/layers/upsample.jl
+++ b/src/layers/upsample.jl
@@ -17,15 +17,16 @@ julia> b(rand(2, 2, 1, 1))
  0.888679  0.710044  0.352773  0.174138
  0.910357  0.7271    0.360586  0.177329```
 """
-struct BilinearUpsample2d{}
-    factors::Tuple{T,T} where T<:Integer
-    BilinearUpsample2d(factors::Tuple{R,R}) where R<:Integer = new(factors)
-    BilinearUpsample2d(factors::F) where F<:Integer = new((factors, factors))
+
+struct BilinearUpsample2d{T<:Integer}
+    factors::Tuple{T,T}
 end
 
+BilinearUpsample2d(factor::F) where F<:Integer = BilinearUpsample2d((factor, factor))
+
 @functor BilinearUpsample2d
 
-function (c::BilinearUpsample2d)(x::AbstractArray)
+function (c::T where T<:BilinearUpsample2d)(x::AbstractArray)
     bilinear_upsample2d(x, c.factors)
 end
 
@@ -81,9 +82,12 @@ Determines the adjoint of the vector of indices `idx`, based on the following as
 * `idx[1] == 1`
 * `all(d in [0,1] for d in diff(idx))`
 
-The adjoint of `idx` can be seen as an inverse operation:
-```jldoctest
-x[idx][idx_adjoint] == x
+The adjoint of `idx` can be seen as an inverse operation such that:
+```
+x = [1, 2, 3, 4, 5]
+idx = [1, 2, 2, 3, 4, 4, 5]
+idx_adjoint = adjoint_of_idx(idx)
+@assert x[idx][idx_adjoint] == x
 ```
 
 The above holds as long as `idx` contains every index in `x`.

From 4d921c45d10af47f1e3701b795c6a0f536e743ac Mon Sep 17 00:00:00 2001
From: Unknown <l.koomen92@gmail.com>
Date: Fri, 15 May 2020 12:42:23 +0200
Subject: [PATCH 06/12] Added CUDA gradient test

Gradient doesn't currently work when using CuArrays
---
 test/cuda/cuda.jl | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/test/cuda/cuda.jl b/test/cuda/cuda.jl
index 128e5c7d8f..7172aa7051 100644
--- a/test/cuda/cuda.jl
+++ b/test/cuda/cuda.jl
@@ -50,6 +50,19 @@ x = gpu(rand(10, 10, 3, 2))
 l = c(gpu(rand(10,10,3,2)))
 @test gradient(x -> sum(c(x)), x)[1] isa CuArray
 
+# BilinearUpsample2d
+c = BilinearUpsample2d((2,2))
+x = rand(10,10,3,2)
+f = x -> sum(c(x))
+df = x -> gradient(f, x)[1]
+
+c_c = gpu(c)
+x_c = gpu(x)
+f_c = x_c -> sum(c_c(x_c))
+df_c = x -> gradient(f_c, x_c)[1]
+@test df_c(x_c) isa CuArray
+@test df(x) == cpu(df_c(x_c))
+
 end
 
 @testset "onecold gpu" begin

From 52ecfd91b4e3318f8436e7b95f4174c236c6abc5 Mon Sep 17 00:00:00 2001
From: Unknown <l.koomen92@gmail.com>
Date: Fri, 15 May 2020 12:48:57 +0200
Subject: [PATCH 07/12] Added GPU tests

Currently gradient does not work with BilinearUpsampling2d when using CuArrays
---
 test/cuda/cuda.jl | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/test/cuda/cuda.jl b/test/cuda/cuda.jl
index 128e5c7d8f..7172aa7051 100644
--- a/test/cuda/cuda.jl
+++ b/test/cuda/cuda.jl
@@ -50,6 +50,19 @@ x = gpu(rand(10, 10, 3, 2))
 l = c(gpu(rand(10,10,3,2)))
 @test gradient(x -> sum(c(x)), x)[1] isa CuArray
 
+# BilinearUpsample2d
+c = BilinearUpsample2d((2,2))
+x = rand(10,10,3,2)
+f = x -> sum(c(x))
+df = x -> gradient(f, x)[1]
+
+c_c = gpu(c)
+x_c = gpu(x)
+f_c = x_c -> sum(c_c(x_c))
+df_c = x -> gradient(f_c, x_c)[1]
+@test df_c(x_c) isa CuArray
+@test df(x) == cpu(df_c(x_c))
+
 end
 
 @testset "onecold gpu" begin

From a6d23d8405fbc7a6c15f9d71354f03183fed6f84 Mon Sep 17 00:00:00 2001
From: Unknown <l.koomen92@gmail.com>
Date: Sat, 16 May 2020 16:38:13 +0200
Subject: [PATCH 08/12] Added a custom adjoint

Since Zygote isn't able to properly handle the current implementation, and moving to an iterative approach I believe would mean a very significant performance reduction, I have added a custom adjoint.

Since the adjoint of upsampling is a downsampling operation, I have used Flux.Conv in combination with a downsample kernel and some manual edge-effect correction.
---
 src/layers/upsample.jl | 149 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 147 insertions(+), 2 deletions(-)

diff --git a/src/layers/upsample.jl b/src/layers/upsample.jl
index 6cfb0db484..6b08633c9c 100644
--- a/src/layers/upsample.jl
+++ b/src/layers/upsample.jl
@@ -34,6 +34,10 @@ function Base.show(io::IO, l::BilinearUpsample2d)
   print(io, "BilinearUpsample2d( $(l.factors[1]), $(l.factors[2]) )")
 end
 
+@adjoint function (c::T where T<:BilinearUpsample2d)(x::AbstractArray)
+    (c::T where T<:BilinearUpsample2d)(x), c̄ -> (nothing, bilinear_upsample_adjoint(c̄, c.factors))
+end
+
 """
     `construct_xq(n::T, m::T) where T<:Integer`
 
@@ -121,7 +125,7 @@ using bilinear interpolation. The interpolation grid is identical to the one use
 """
 function bilinear_upsample2d(img::AbstractArray{T,4}, k_upsample::NTuple{2,<:Real}) where T
 
-    ilow1, ihigh1, wdiff1, ilow2, ihigh2_r, wdiff2 = setup_upsample(size(img), eltype(img), k_upsample)
+    ilow1, ihigh1, wdiff1, ilow2, ihigh2, wdiff2, ihigh2_r = setup_upsample(size(img), eltype(img), k_upsample)
 
     @inbounds imgupsampled = bilinear_upsample_workhorse(img, ilow1, ihigh1, wdiff1, ilow2, ihigh2_r, wdiff2)
 
@@ -165,6 +169,147 @@ Creates arrays of interpolation indices and weights for the bilinear_upsample2d
     wdiff1 = imgdtype.(wdiff1)
     wdiff2 = imgdtype.(wdiff2)
 
-    return ilow1, ihigh1, wdiff1, ilow2, ihigh2_r, wdiff2
+    return ilow1, ihigh1, wdiff1, ilow2, ihigh2, wdiff2, ihigh2_r
+
+end
+
+"""
+ `get_downsamplekernel(n::T) where T<:Integer`
+
+# Arguments
+- `n<:Integer`: upsample factor for which a downsample kernel will be determined
+
+# Outputs
+- `kernel`: downsample kernel
+
+"""
+function get_downsamplekernel(n::T) where T<:Integer
+    step = 1//n
+    if n % 2 == 0
+        start = step//2
+        upward = collect(start:step:1//1)
+        kernel = [upward; reverse(upward)]
+    else
+        start = step
+        upward = collect(start:step:1//1)
+        kernel = [upward; reverse(upward[1:end-1])]
+    end
+    return kernel
+end
+
+"""
+    `bilinear_upsample_adjoint(arr::AbstractArray, factors::Tuple{T,T} where T<:Integer)`
+
+# Arguments
+- `arr::AbstractArray`: array that has been upsampled using the upsample factors in `factors`
+
+# Outputs
+- `arr_ds`: downsampled version of `arr`
+
+# Explanation
+Custom adjoint for `BilinearUpsample2d`. Needed because Zygote cannot properly determine gradients
+for the current implementation of the forward pass. The adjoint of upsampling is a downsampling operation, which
+in this implementation is performed using `Flux.Conv` in combination with a downsampling kernel based on the
+upsampling factors. Because of the zero-padding during convolution, the values at the boundary are polluted by edge-effects,
+which have been corrected for manually.
+"""
+function bilinear_upsample_adjoint(arr::AbstractArray, factors::Tuple{T,T} where T<:Integer)
+
+    if size(arr,1) == factors[1]
+        arr = sum(arr, dims=1)
+        factors = (1, factors[2])
+    end
+
+    if size(arr,2) == factors[2]
+        arr = sum(arr, dims=2)
+        factors = (factors[1], 1)
+    end
+
+    if size(arr)[1:2] == (1,1)
+        ds_arr = arr
+        return ds_arr
+    end
+
+    n_chan, n_batch = size(arr)[3:4]
+
+    kern1 = get_downsamplekernel(factors[1])
+    kern2 = get_downsamplekernel(factors[2])
+    kern = kern1 .* kern2'
+
+    kern_sizes = size(kern)
+    pads = tuple((Int.(floor(factor//2)) for factor in factors)...)
+    strides = factors
+
+    conv_ds = Conv(kern_sizes, n_chan=>n_chan, pad=pads, stride=strides)
+
+    conv_ds.weight .*= 0
+    for i in 1:n_chan
+        conv_ds.weight[:,:,i,i] .= kern
+    end
+    conv_ds.bias .*= 0
+
+    if arr isa CuArray
+        conv_ds = gpu(conv_ds)
+    end
+
+    arr_ds = conv_ds(arr)
+
+    # Still have to fix edge effects due to zero-padding of convolution,
+    # TODO: Could be circumvented by having padding that just extrapolates the value at the first/last index
+    nextras = tuple((Int.(floor(factor//2)) for factor in factors)...)
+
+    # First dimension edge-effect correction
+    if nextras[1] > 0
+        kern_extra1 = kern[1:nextras[1],:]
+        conv_extra1 = Conv(size(kern_extra1), n_chan=>n_chan, pad=(0,pads[2]), stride=(1,strides[2]))
+
+        conv_extra1.weight .*= 0
+        for i in 1:n_chan
+            conv_extra1.weight[:,:,i,i] .= kern_extra1
+        end
+        conv_extra1.bias .*= 0
+
+        if arr isa CuArray
+            conv_extra1 = gpu(conv_extra1)
+        end
+
+        arr_ds[[1],:,:,:] .+= conv_extra1(arr[1:nextras[1],:,:,:])
+        conv_extra1.weight .= conv_extra1.weight[end:-1:1,:,:,:]
+        arr_ds[[end],:,:,:] .+= conv_extra1(arr[end-nextras[1]+1:end,:,:,:])
+    end
+
+    # Second dimension edge-effect correction
+    if nextras[2] > 0
+        kern_extra2 = kern[:,1:nextras[2]]
+        conv_extra2 = Conv(size(kern_extra2), n_chan=>n_chan, pad=(pads[1],0), stride=(strides[1],1))
+
+        conv_extra2.weight .*= 0
+        for i in 1:n_chan
+            conv_extra2.weight[:,:,i,i] .= kern_extra2
+        end
+        conv_extra2.bias .*= 0
+
+        if arr isa CuArray
+            conv_extra2 = gpu(conv_extra2)
+        end
+
+        arr_ds[:,[1],:,:] .+= conv_extra2(arr[:,1:nextras[2],:,:])
+        conv_extra2.weight .= conv_extra2.weight[:,end:-1:1,:,:]
+        arr_ds[:,[end],:,:] .+= conv_extra2(arr[:,end-nextras[2]+1:end,:,:])
+    end
+
+    # Finally fix four corners if needed
+    kern = eltype(arr).(kern)
+    if arr isa CuArray
+        kern = gpu(kern)
+    end
+    n1, n2 = nextras
+    if (n1 > 0) & (n2 > 0)
+        arr_ds[1,1,:,:] .+= sum(kern[1:n1,1:n2] .* arr[1:n1,1:n2,:,:], dims=(1,2))[1,1,:,:]
+        arr_ds[1,end,:,:] .+= sum(kern[1:n1,end-n2+1:end] .* arr[1:n1,end-n2+1:end,:,:], dims=(1,2))[1,1,:,:]
+        arr_ds[end,end,:,:] .+= sum(kern[end-n1+1:end,end-n2+1:end] .* arr[end-n1+1:end,end-n2+1:end,:,:], dims=(1,2))[1,1,:,:]
+        arr_ds[end,1,:,:] .+= sum(kern[end-n1+1:end,1:n2] .* arr[end-n1+1:end,1:n2,:,:], dims=(1,2))[1,1,:,:]
+    end
 
+    return arr_ds
 end

From 66dd999b27e812044efe615e8c75d27a4ea68526 Mon Sep 17 00:00:00 2001
From: Unknown <l.koomen92@gmail.com>
Date: Thu, 21 May 2020 13:13:50 +0200
Subject: [PATCH 09/12] Added some @view macros

Effort to reduce memory footprint slightly
---
 src/layers/upsample.jl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/layers/upsample.jl b/src/layers/upsample.jl
index 6b08633c9c..bcc4a10008 100644
--- a/src/layers/upsample.jl
+++ b/src/layers/upsample.jl
@@ -273,9 +273,9 @@ function bilinear_upsample_adjoint(arr::AbstractArray, factors::Tuple{T,T} where
             conv_extra1 = gpu(conv_extra1)
         end
 
-        arr_ds[[1],:,:,:] .+= conv_extra1(arr[1:nextras[1],:,:,:])
-        conv_extra1.weight .= conv_extra1.weight[end:-1:1,:,:,:]
-        arr_ds[[end],:,:,:] .+= conv_extra1(arr[end-nextras[1]+1:end,:,:,:])
+        arr_ds[[1],:,:,:] .+= conv_extra1(@view(arr[1:nextras[1],:,:,:]))
+        conv_extra1.weight .= @view(conv_extra1.weight[end:-1:1,:,:,:])
+        arr_ds[[end],:,:,:] .+= conv_extra1(@view(arr[end-nextras[1]+1:end,:,:,:]))
     end
 
     # Second dimension edge-effect correction
@@ -293,9 +293,9 @@ function bilinear_upsample_adjoint(arr::AbstractArray, factors::Tuple{T,T} where
             conv_extra2 = gpu(conv_extra2)
         end
 
-        arr_ds[:,[1],:,:] .+= conv_extra2(arr[:,1:nextras[2],:,:])
-        conv_extra2.weight .= conv_extra2.weight[:,end:-1:1,:,:]
-        arr_ds[:,[end],:,:] .+= conv_extra2(arr[:,end-nextras[2]+1:end,:,:])
+        arr_ds[:,[1],:,:] .+= conv_extra2(@view(arr[:,1:nextras[2],:,:]))
+        conv_extra2.weight .= @view(conv_extra2.weight[:,end:-1:1,:,:])
+        arr_ds[:,[end],:,:] .+= conv_extra2(@view(arr[:,end-nextras[2]+1:end,:,:]))
     end
 
     # Finally fix four corners if needed

From a265d217787eb16b3a0dc8bfe557367ed53a5ec8 Mon Sep 17 00:00:00 2001
From: Unknown <l.koomen92@gmail.com>
Date: Thu, 21 May 2020 13:15:06 +0200
Subject: [PATCH 10/12] More @view macro use

---
 src/layers/upsample.jl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/layers/upsample.jl b/src/layers/upsample.jl
index bcc4a10008..05a64418c1 100644
--- a/src/layers/upsample.jl
+++ b/src/layers/upsample.jl
@@ -305,10 +305,10 @@ function bilinear_upsample_adjoint(arr::AbstractArray, factors::Tuple{T,T} where
     end
     n1, n2 = nextras
     if (n1 > 0) & (n2 > 0)
-        arr_ds[1,1,:,:] .+= sum(kern[1:n1,1:n2] .* arr[1:n1,1:n2,:,:], dims=(1,2))[1,1,:,:]
-        arr_ds[1,end,:,:] .+= sum(kern[1:n1,end-n2+1:end] .* arr[1:n1,end-n2+1:end,:,:], dims=(1,2))[1,1,:,:]
-        arr_ds[end,end,:,:] .+= sum(kern[end-n1+1:end,end-n2+1:end] .* arr[end-n1+1:end,end-n2+1:end,:,:], dims=(1,2))[1,1,:,:]
-        arr_ds[end,1,:,:] .+= sum(kern[end-n1+1:end,1:n2] .* arr[end-n1+1:end,1:n2,:,:], dims=(1,2))[1,1,:,:]
+        arr_ds[1,1,:,:] .+= sum(kern[1:n1,1:n2] .* @view(arr[1:n1,1:n2,:,:]), dims=(1,2))[1,1,:,:]
+        arr_ds[1,end,:,:] .+= sum(kern[1:n1,end-n2+1:end] .* @view(arr[1:n1,end-n2+1:end,:,:]), dims=(1,2))[1,1,:,:]
+        arr_ds[end,end,:,:] .+= sum(kern[end-n1+1:end,end-n2+1:end] .* @view(arr[end-n1+1:end,end-n2+1:end,:,:]), dims=(1,2))[1,1,:,:]
+        arr_ds[end,1,:,:] .+= sum(kern[end-n1+1:end,1:n2] .* @view(arr[end-n1+1:end,1:n2,:,:]), dims=(1,2))[1,1,:,:]
     end
 
     return arr_ds

From b67d4e82e67d15e5a7668cbdd9ca0bb3bdaf9c22 Mon Sep 17 00:00:00 2001
From: Unknown <l.koomen92@gmail.com>
Date: Thu, 21 May 2020 13:35:57 +0200
Subject: [PATCH 11/12] Fixed type-instability

type instability occurred due to a variable dimension number, fixed by hard-coding everything to 4 dimensions
---
 src/layers/upsample.jl | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/src/layers/upsample.jl b/src/layers/upsample.jl
index 05a64418c1..e607882761 100644
--- a/src/layers/upsample.jl
+++ b/src/layers/upsample.jl
@@ -58,15 +58,22 @@ end
 
 Creates interpolation lower and upper indices, and broadcastable weights
 """
-@nograd function get_inds_and_ws(xq, dim, n_dims)
+@nograd function get_inds_and_ws(xq, dim)
     n = length(xq)
 
     ilow = floor.(Int, xq)
     ihigh = ceil.(Int, xq)
 
-    wdiff = xq .- ilow
+    wdiff = xq[:,:,:,:] .- ilow[:,:,:,:]
+
+    if dim == 1
+        newsizetup = (n, 1, 1, 1)
+    elseif dim == 2
+        newsizetup = (1, n, 1, 1)
+    else
+        error("Unreachable reached")
+    end
 
-    newsizetup = tuple((i == dim ? n : 1 for i in 1:n_dims)...)
     wdiff = reshape(wdiff, newsizetup)
 
     return ilow, ihigh, wdiff
@@ -125,7 +132,7 @@ using bilinear interpolation. The interpolation grid is identical to the one use
 """
 function bilinear_upsample2d(img::AbstractArray{T,4}, k_upsample::NTuple{2,<:Real}) where T
 
-    ilow1, ihigh1, wdiff1, ilow2, ihigh2, wdiff2, ihigh2_r = setup_upsample(size(img), eltype(img), k_upsample)
+    ilow1, ihigh1, wdiff1, ilow2, ihigh2, wdiff2, ihigh2_r = setup_upsample(img, k_upsample)
 
     @inbounds imgupsampled = bilinear_upsample_workhorse(img, ilow1, ihigh1, wdiff1, ilow2, ihigh2_r, wdiff2)
 
@@ -138,10 +145,6 @@ end
 Does the heavy lifting part of the bilinear upsampling operation
 """
 function bilinear_upsample_workhorse(img, ilow1, ihigh1, wdiff1, ilow2, ihigh2_r, wdiff2)
-    if typeof(img) <: CuArray
-        wdiff1 = CuArray(wdiff1)
-        wdiff2 = CuArray(wdiff2)
-    end
     imgupsampled = @view(img[ilow1,ilow2,:,:]) .* (1 .- wdiff1) .+ @view(img[ihigh1,ilow2,:,:]) .* wdiff1
     imgupsampled = imgupsampled .* (1 .- wdiff2) .+ @view(imgupsampled[:,ihigh2_r,:,:]) .* wdiff2
 end
@@ -151,8 +154,9 @@ end
 
 Creates arrays of interpolation indices and weights for the bilinear_upsample2d operation.
 """
-@nograd function setup_upsample(imgsize::NTuple{4,<:Integer}, imgdtype, k_upsample::NTuple{2,<:Real})
+@nograd function setup_upsample(img, k_upsample::NTuple{2,<:Real})
     n_dims = 4
+    imgsize = size(img)
     newsize = get_newsize(imgsize, k_upsample)
 
     # Create interpolation grids
@@ -160,14 +164,19 @@ Creates arrays of interpolation indices and weights for the bilinear_upsample2d
     xq2 = construct_xq(imgsize[2], newsize[2])
 
     # Get linear interpolation lower- and upper index, and weights
-    ilow1, ihigh1, wdiff1 = get_inds_and_ws(xq1, 1, n_dims)
-    ilow2, ihigh2, wdiff2 = get_inds_and_ws(xq2, 2, n_dims)
+    ilow1, ihigh1, wdiff1 = get_inds_and_ws(xq1, 1)
+    ilow2, ihigh2, wdiff2 = get_inds_and_ws(xq2, 2)
 
     # Adjust the upper interpolation indices of the second dimension
     ihigh2_r = adjoint_of_idx(ilow2)[ihigh2]
 
-    wdiff1 = imgdtype.(wdiff1)
-    wdiff2 = imgdtype.(wdiff2)
+    wdiff1 = eltype(img).(wdiff1)
+    wdiff2 = eltype(img).(wdiff2)
+
+    if typeof(img) <: CuArray
+        wdiff1 = CuArray(wdiff1)
+        wdiff2 = CuArray(wdiff2)
+    end
 
     return ilow1, ihigh1, wdiff1, ilow2, ihigh2, wdiff2, ihigh2_r
 

From 0a652680a3e58482e1160d26044a5fb9bbcf04fc Mon Sep 17 00:00:00 2001
From: Unknown <l.koomen92@gmail.com>
Date: Thu, 21 May 2020 14:10:42 +0200
Subject: [PATCH 12/12] Fixed some type-instabilities in adjoint

Only type-instability left is the fact that the weights of Flux.Conv are not type stable.
---
 src/layers/upsample.jl | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/layers/upsample.jl b/src/layers/upsample.jl
index e607882761..789e4a63d2 100644
--- a/src/layers/upsample.jl
+++ b/src/layers/upsample.jl
@@ -234,19 +234,19 @@ function bilinear_upsample_adjoint(arr::AbstractArray, factors::Tuple{T,T} where
         factors = (factors[1], 1)
     end
 
-    if size(arr)[1:2] == (1,1)
+    if (size(arr,1) == 1) & (size(arr,2) == 1)
         ds_arr = arr
         return ds_arr
     end
 
-    n_chan, n_batch = size(arr)[3:4]
+    n_chan, n_batch = size(arr,3), size(arr,4)
 
     kern1 = get_downsamplekernel(factors[1])
     kern2 = get_downsamplekernel(factors[2])
     kern = kern1 .* kern2'
 
     kern_sizes = size(kern)
-    pads = tuple((Int.(floor(factor//2)) for factor in factors)...)
+    pads = (floor(Int, factors[1]//2), floor(Int, factors[2]//2))
     strides = factors
 
     conv_ds = Conv(kern_sizes, n_chan=>n_chan, pad=pads, stride=strides)
@@ -265,7 +265,8 @@ function bilinear_upsample_adjoint(arr::AbstractArray, factors::Tuple{T,T} where
 
     # Still have to fix edge effects due to zero-padding of convolution,
     # TODO: Could be circumvented by having padding that just extrapolates the value at the first/last index
-    nextras = tuple((Int.(floor(factor//2)) for factor in factors)...)
+    # nextras = tuple((Int.(floor(factor//2)) for factor in factors)...)
+    nextras = (floor(Int, factors[1]//2), floor(Int, factors[2]//2))
 
     # First dimension edge-effect correction
     if nextras[1] > 0
@@ -278,7 +279,7 @@ function bilinear_upsample_adjoint(arr::AbstractArray, factors::Tuple{T,T} where
         end
         conv_extra1.bias .*= 0
 
-        if arr isa CuArray
+        if typeof(arr) <: CuArray
             conv_extra1 = gpu(conv_extra1)
         end
 
@@ -298,7 +299,7 @@ function bilinear_upsample_adjoint(arr::AbstractArray, factors::Tuple{T,T} where
         end
         conv_extra2.bias .*= 0
 
-        if arr isa CuArray
+        if typeof(arr) <: CuArray
             conv_extra2 = gpu(conv_extra2)
         end
 
@@ -308,8 +309,8 @@ function bilinear_upsample_adjoint(arr::AbstractArray, factors::Tuple{T,T} where
     end
 
     # Finally fix four corners if needed
-    kern = eltype(arr).(kern)
-    if arr isa CuArray
+    # kern = eltype(arr).(kern)
+    if typeof(arr) <: CuArray
         kern = gpu(kern)
     end
     n1, n2 = nextras