From 6fa8b76bbb37f2e3b773dfc602d336287fed8600 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Sun, 24 Feb 2019 13:59:27 +0530 Subject: [PATCH] Add Upsampling GPU Kernel --- src/dnn/CUDNN.jl | 1 + src/dnn/upsample.jl | 59 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 src/dnn/upsample.jl diff --git a/src/dnn/CUDNN.jl b/src/dnn/CUDNN.jl index 61fc0959..ad3664ec 100644 --- a/src/dnn/CUDNN.jl +++ b/src/dnn/CUDNN.jl @@ -30,6 +30,7 @@ end include("libcudnn.jl") include("helpers.jl") include("nnlib.jl") +include("upsample.jl") version() = VersionNumber(cudnnGetProperty(CUDAapi.MAJOR_VERSION), cudnnGetProperty(CUDAapi.MINOR_VERSION), diff --git a/src/dnn/upsample.jl b/src/dnn/upsample.jl new file mode 100644 index 00000000..2433edf4 --- /dev/null +++ b/src/dnn/upsample.jl @@ -0,0 +1,59 @@ +function upsample_kernel(state, y, x, height, width, channels, batch, stride, scale) + i = @linearidx y state + + y_idx = i + y_h = (i - 1) % (height * stride[1]) + 1 + i = (i - 1) ÷ (height * stride[1]) + y_w = i % (width * stride[2]) + 1 + i = i ÷ (width * stride[2]) + y_c = i % channels + 1 + i = i ÷ channels + y_b = i % batch + 1 + + x_h = (y_h - 1) ÷ stride[1] + 1 + x_w = (y_w - 1) ÷ stride[2] + 1 + x_c = y_c + x_idx = (y_b - 1) * width * height * channels + (x_c - 1) * width * height + (x_w - 1) * height + x_h + + @inbounds y[y_idx] = scale * x[x_idx] + + return nothing +end + +function upsample(x::CuArray, stride, scale = 1) + (height, width, channels, batch) = size(x) + y = similar(x, (height * stride[1], width * stride[2], channels, batch)) + gpu_call(upsample_kernel, y, (y, x, height, width, channels, batch, stride, scale)) + return y +end + +function ∇upsample_kernel(state, y, x, height, width, channels, batch, stride, scale) + i = @linearidx y state + + y_idx = i + y_h = (i - 1) % (height * stride[1]) + 1 + i = (i - 1) ÷ (height * stride[1]) + y_w = i % (width * stride[2]) + 1 + i = i ÷ (width * stride[2]) + y_c = i % channels + 1 + i = i ÷ channels + y_b = i % batch + 1 + + x_h = (y_h - 1) ÷ stride[1] + 1 + x_w = (y_w - 1) ÷ stride[2] + 1 + x_c = y_c + x_idx = (y_b - 1) * width * height * channels + (x_c - 1) * width * height + (x_w - 1) * height + x_h + + @inbounds x[x_idx] += y[y_idx] / scale + + return nothing +end + +function ∇upsample(dy::CuArray, stride, scale = 1) + (height, width, channels, batch) = size(dy) + @assert height % stride[1] == 0 + @assert width % stride[2] == 0 + dx = similar(dy, (height ÷ stride[1], width ÷ stride[2], channels, batch)) + gpu_call(∇upsample_kernel, dy, (dy, dx, height, width, channels, batch, stride, scale)) + return dx +end