Skip to content

add bilinear upsampling #262

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jan 8, 2021
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Requires = "0.5, 1.0"
julia = "1.3"

[extras]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Expand All @@ -23,4 +24,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"

[targets]
test = ["ChainRulesTestUtils", "FiniteDifferences", "Random", "StableRNGs", "Test", "Zygote"]
test = ["ChainRulesTestUtils", "CUDA", "FiniteDifferences", "Random", "StableRNGs", "Test", "Zygote"]
3 changes: 1 addition & 2 deletions src/NNlib.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,13 @@ is_nnpack_available() = false
end

include("activations.jl")

include("softmax.jl")
include("misc.jl")
include("batched/batchedmul.jl")
include("gemm.jl")
include("conv.jl")
include("conv_bias_act.jl")
include("pooling.jl")
include("upsample.jl")

## Include implementations
include("impl/padding_edges.jl")
Expand Down
25 changes: 0 additions & 25 deletions src/misc.jl

This file was deleted.

242 changes: 242 additions & 0 deletions src/upsample.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
export bilinear_upsample, ∇bilinear_upsample, pixel_shuffle

"""
bilinear_upsample(x::AbstractArray{<:Number,4}, k::NTuple{2,Int})

Upsamples the first 2 dimensions of the array `x` by the upsample factors stored in `k`,
using bilinear interpolation.

The size of the output is equal to
`(k[1]*S1, k[2]*S2, S3, S4)`, where `S1, S2, S3, S4 = size(x)`.

The interpolation grid is identical to the one used by `imresize` from `Images.jl`.

Currently only 2d upsampling is supported.
"""
function bilinear_upsample(x::AbstractArray{<:Number,4}, k::NTuple{2,Int})
# This function is gpu friendly

imgsize = size(x)
newsize = get_newsize(imgsize, k)

# Get linear interpolation lower- and upper index, and weights
ilow1, ihigh1, wdiff1 = get_inds_and_ws(x, imgsize[1], newsize[1], 1)
ilow2, ihigh2, wdiff2 = get_inds_and_ws(x, imgsize[2], newsize[2], 2)

# Adjust the upper interpolation indices of the second dimension
ihigh2_r = adjoint_of_idx(ilow2)[ihigh2]

@inbounds y = @view(x[ilow1,ilow2,:,:]) .* (1 .- wdiff1) .+ @view(x[ihigh1,ilow2,:,:]) .* wdiff1
@inbounds y .= y .* (1 .- wdiff2) .+ y[:,ihigh2_r,:,:] .* wdiff2
# @inbounds y = y .* (1 .- wdiff2) .+ @view(y[:,ihigh2_r,:,:]) .* wdiff2 # equivalent to line above
return y
end

function get_inds_and_ws(x::T, n::Int, m::Int, dim::Int) where T <: AbstractArray
# Creates interpolation grid for resampling.
# Creates the same grid as used in Image.jl `imresize`.
step = n // m
offset = (n + 1)//2 - step//2 - step * (m//2 - 1)
xq = clamp.(range(offset, step=step, length=m), 1, n)

# Creates interpolation lower and upper indices, and broadcastable weights
ilow = floor.(Int, xq)
ihigh = ceil.(Int, xq)
sizew = ntuple(i-> i == dim ? length(xq) : 1, ndims(x))
wdiff = convert(T, reshape(xq .- ilow, sizew)) # wdiff possibly lives on gpu
return ilow, ihigh, wdiff
end

"""
adjoint_of_idx(idx::Vector{<:Integer})

# Arguments
- `idx`: a vector of indices from which you want the adjoint.

# Outputs
-`idx_adjoint`: index that inverses the operation `x[idx]`.

# Explanation
Determines the adjoint of the vector of indices `idx`, based on the following assumptions:
* `idx[1] == 1`
* `all(d in [0,1] for d in diff(idx))`
The adjoint of `idx` can be seen as an inverse operation such that:

```julia
x = [1, 2, 3, 4, 5]
idx = [1, 2, 2, 3, 4, 4, 5]
idx_adjoint = adjoint_of_idx(idx)
@assert x[idx][idx_adjoint] == x
```
The above holds as long as `idx` contains every index in `x`.
"""
function adjoint_of_idx(idx::Vector{Int})
d = trues(length(idx))
d[2:end] .= diff(idx)
idx_adjoint = findall(d)
return idx_adjoint
end

function get_newsize(sz, k)
return ntuple(i -> i <= length(k) ? sz[i]*k[i] : sz[i], length(sz))
end


"""
∇bilinear_upsample(Δ::AbstractArray{<:Number,4}, k::NTuple{2,Int})

# Arguments
- `Δ`: array that has been upsampled using the upsample factors in `k`

# Outputs
- `dx`: downsampled version of `Δ`

# Explanation

Custom adjoint for [`bilinear_upsample`](@ref).
The adjoint of upsampling is a downsampling operation, which
in this implementation is performed using `NNlib.conv` in combination with a downsampling kernel based on the
upsampling factors. Because of the zero-padding during convolution, the values at the boundary are polluted by edge-effects,
which have been corrected for manually.
"""
function ∇bilinear_upsample(Δ::AbstractArray{<:Number, 4}, k::NTuple{2,Int})
# This function is gpu friendly

# Be more efficient on some corner cases
if size(Δ, 1) == k[1]
Δ = sum(Δ, dims=1)
k = (1, k[2])
end
if size(Δ, 2) == k[2]
Δ = sum(Δ, dims=2)
k = (k[1], 1)
end
if (size(Δ, 1) == 1) && (size(Δ, 2) == 1)
dx = Δ
return dx
end

n_chan, n_batch = size(Δ, 3), size(Δ, 4)

kern1 = get_downsamplekernel(Δ, k[1])
kern2 = get_downsamplekernel(Δ, k[2])
kern = kern1 * kern2'

pad = (floor(Int, k[1]//2), floor(Int, k[2]//2))
stride = k

weight = cat(fill(kern, n_chan)..., dims=(3,4))
dx = conv(Δ, weight, pad=pad, stride=stride)

# Still have to fix edge effects due to zero-padding of convolution,
# TODO: Could be circumvented by having padding that just extrapolates the value at the first/last index
# nextras = tuple((Int.(floor(factor//2)) for factor in k)...)
nextras = (floor(Int, k[1]//2), floor(Int, k[2]//2))

# First dimension edge-effect correction
if nextras[1] > 0
kern1 = kern[1:nextras[1],:]
pad1 = (0, pad[2])
stride1 = (1, stride[2])
weight1 = similar(Δ, eltype(Δ), (size(kern1)..., n_chan, n_chan))
weight1 .= 0
for i in 1:n_chan
weight1[:,:,i,i] .= kern1
end

dx[[1],:,:,:] .+= conv(Δ[1:nextras[1],:,:,:], weight1, pad=pad1, stride=stride1)
weight1 .= weight1[end:-1:1,:,:,:]
dx[[end],:,:,:] .+= conv(Δ[end-nextras[1]+1:end,:,:,:], weight1, pad=pad1, stride=stride1)

## Conv with views is not dispatched to CUDA.conv
# dx[[1],:,:,:] .+= conv(@view(Δ[1:nextras[1],:,:,:]), weight1, pad=pad1, stride=stride1)
# weight1 .= @view(weight1[end:-1:1,:,:,:])
# dx[[end],:,:,:] .+= conv(@view(Δ[end-nextras[1]+1:end,:,:,:]), weight1, pad=pad1, stride=stride1)
end

# Second dimension edge-effect correction
if nextras[2] > 0
kern2 = kern[:,1:nextras[2]]
pad2 = (pad[1], 0)
stride2 = (stride[1], 1)
weight2 = similar(Δ, eltype(Δ), (size(kern2)..., n_chan, n_chan))
weight2 .= 0
for i in 1:n_chan
weight2[:,:,i,i] .= kern2
end

yy = conv(Δ[:,1:nextras[2],:,:], weight2, pad=pad2, stride=stride2)
dx[:,[1],:,:] .+= conv(Δ[:,1:nextras[2],:,:], weight2, pad=pad2, stride=stride2)
weight2 .= weight2[:,end:-1:1,:,:]
dx[:,[end],:,:] .+= conv(Δ[:,end-nextras[2]+1:end,:,:], weight2, pad=pad2, stride=stride2)

## Conv with views is not dispatched to CUDA.conv
# yy = conv(@view(Δ[:,1:nextras[2],:,:]), weight2, pad=pad2, stride=stride2)
# dx[:,[1],:,:] .+= conv(@view(Δ[:,1:nextras[2],:,:]), weight2, pad=pad2, stride=stride2)
# weight2 .= @view(weight2[:,end:-1:1,:,:])
# dx[:,[end],:,:] .+= conv(@view(Δ[:,end-nextras[2]+1:end,:,:]), weight2, pad=pad2, stride=stride2)
end

## Finally fix four corners if needed
n1, n2 = nextras
if (n1 > 0) & (n2 > 0)
dx[1,1,:,:] .+= sum(kern[1:n1,1:n2] .* @view(Δ[1:n1,1:n2,:,:]), dims=(1,2))[1,1,:,:]
dx[1,end,:,:] .+= sum(kern[1:n1,end-n2+1:end] .* @view(Δ[1:n1,end-n2+1:end,:,:]), dims=(1,2))[1,1,:,:]
dx[end,end,:,:] .+= sum(kern[end-n1+1:end,end-n2+1:end] .* @view(Δ[end-n1+1:end,end-n2+1:end,:,:]), dims=(1,2))[1,1,:,:]
dx[end,1,:,:] .+= sum(kern[end-n1+1:end,1:n2] .* @view(Δ[end-n1+1:end,1:n2,:,:]), dims=(1,2))[1,1,:,:]
end

return dx
end

# `n` upsample factor for which a downsample kernel will be determined.
# Δ is given in case of necessity of gpu conversion
function get_downsamplekernel(Δ, n::Int)
step = 1//n
if n % 2 == 0
start = step//2
upward = collect(start:step:1//1)
kernel = [upward; reverse(upward)]
else
start = step
upward = collect(start:step:1//1)
kernel = [upward; reverse(upward[1:end-1])]
end
# TODO there must be a more convenient way to send to gpu
kernel = convert(typeof(Δ), reshape(kernel, length(kernel), 1, 1, 1))
kernel = dropdims(kernel, dims=(2,3,4))
return kernel
end

function ChainRulesCore.rrule(::typeof(bilinear_upsample), x, k)
Ω = bilinear_upsample(x, k)
function bilinear_upsample_pullback(Δ)
(NO_FIELDS, ∇bilinear_upsample(Δ, k), DoesNotExist())
end
return Ω, bilinear_upsample_pullback
end


"""
pixel_shuffle(x, r)

Pixel shuffling operation. `r` is the upscale factor for shuffling.
The operation converts an input of size [W,H,r²C,N] to size [rW,rH,C,N]
Used extensively in super-resolution networks to upsample
towards high resolution features.

Reference : https://arxiv.org/pdf/1609.05158.pdf
"""
function pixel_shuffle(x::AbstractArray, r::Integer)
@assert ndims(x) > 2
d = ndims(x) - 2
sizein = size(x)[1:d]
cin, n = size(x, d+1), size(x, d+2)
@assert cin % r^d == 0
cout = cin ÷ r^d
# x = reshape(x, sizein..., fill(r, d)..., cout, n) # bug https://github.com/FluxML/Zygote.jl/issues/866
x = reshape(x, sizein..., ntuple(i->r, d)..., cout, n)
perm = [d+1:2d 1:d]' |> vec # = [d+1, 1, d+2, 2, ..., 2d, d]
x = permutedims(x, (perm..., 2d+1, 2d+2))
return reshape(x, ((r .* sizein)..., cout, n))
end
6 changes: 4 additions & 2 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ using FiniteDifferences: FiniteDifferenceMethod, central_fdm
import Zygote
using Zygote: gradient
using StableRNGs
using CUDA
CUDA.allowscalar(false)

const rng = StableRNG(123)

Expand Down Expand Up @@ -36,6 +38,6 @@ end
include("softmax.jl")
end

@testset "Misc Stuff" begin
include("misc.jl")
@testset "Upsampling" begin
include("upsample.jl")
end
30 changes: 30 additions & 0 deletions test/misc.jl → test/upsample.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,32 @@
@testset "bilinear_upsample 2d" begin
x = reshape(Float32[1. 2.; 3. 4.], (2,2,1,1))
y_true = [1//1 5//4 7//4 2//1;
1//1 5//4 7//4 2//1;
5//3 23//12 29//12 8//3;
7//3 31//12 37//12 10//3;
3//1 13//4 15//4 4//1;
3//1 13//4 15//4 4//1][:,:,:,:]

y = bilinear_upsample(x, (3, 2))
@test size(y) == size(y_true)
@test eltype(y) == Float32
@test y ≈ y_true

gradtest(x->bilinear_upsample(x, (3, 2)), x, atol=1e-4)

if CUDA.has_cuda()
y = bilinear_upsample(x |> cu, (3, 2))
@test y isa CuArray
@test Array(y) ≈ y_true
g_gpu = Zygote.gradient(x -> sum(sin.(bilinear_upsample(x, (3, 2))))
, x |> cu)[1]
@test g_gpu isa CuArray
g_cpu = Zygote.gradient(x -> sum(sin.(bilinear_upsample(x, (3, 2))))
, x)[1]
@test Array(g_cpu) ≈ g_cpu atol=1e-4
end
end

@testset "pixel_shuffle" begin
x = reshape(1:16, (2, 2, 4, 1))
# [:, :, 1, 1] =
Expand Down Expand Up @@ -61,3 +90,4 @@
gradtest(x -> pixel_shuffle(x, r), x)
end
end