Skip to content

Commit c3cc520

Browse files
authored
Allow regular convolution for AMDGPU (#473)
* Do not error on regular convolutions * Add regular convolution test * Update docs * Flip kernel automatically
1 parent c513529 commit c3cc520

File tree

4 files changed

+53
-8
lines changed

4 files changed

+53
-8
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
1818
AMDGPUExt = "AMDGPU"
1919

2020
[compat]
21-
AMDGPU = "0.4.7"
21+
AMDGPU = "0.4.8"
2222
Adapt = "2, 3.2"
2323
ChainRulesCore = "1.13"
2424
Requires = "0.5, 1.0"

docs/src/reference.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,13 @@ pad_zeros
7676

7777
`Flux`'s `Conv` and `CrossCor` layers use `NNlib.DenseConvDims` and `NNlib.conv` internally.
7878

79+
!!! AMDGPU MIOpen supports only cross-correlation (flipkernel=true).
80+
Therefore for every regular convolution (flipkernel=false)
81+
kernel is flipped before calculation.
82+
For better performance, use cross-correlation (flipkernel=true)
83+
and manually flip the kernel before `NNlib.conv` call.
84+
`Flux` handles this automatically, this is only required for direct calls.
85+
7986
```@docs
8087
conv
8188
ConvDims

ext/AMDGPUExt/conv.jl

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,19 @@
11
function NNlib.conv!(
22
y::ROCArray{T, N}, x::ROCArray{T, N}, w::ROCArray{T, N}, cdims::DenseConvDims,
33
) where {T <: MIOPENFloat, N}
4-
NNlib.flipkernel(cdims) || throw(ArgumentError(
5-
"MIOpen supports only cross-correlation as its convolution implementation."))
4+
if !NNlib.flipkernel(cdims)
5+
@warn """
6+
MIOpen supports only cross-correlation (flipkernel=true).
7+
Therefore for every regular convolution (flipkernel=false)
8+
kernel is flipped before calculation.
9+
For better performance, use cross-correlation (flipkernel=true)
10+
and manually flip the kernel before `NNlib.conv` call.
11+
""" maxlog=1
12+
flip_dims = ntuple(
13+
i -> (i ndims(w) - 2) ? (size(w, i):-1:1) : Colon(),
14+
ndims(w))
15+
w = w[flip_dims...]
16+
end
617

718
nd = max(0, 4 - N)
819
ncdims = NNlib.insert_singleton_spatial_dimension(cdims, nd)
@@ -18,8 +29,19 @@ end
1829
function NNlib.∇conv_data!(
1930
dx::ROCArray{T, N}, dy::ROCArray{T, N}, w::ROCArray{T, N}, cdims::DenseConvDims,
2031
) where {T <: MIOPENFloat, N}
21-
NNlib.flipkernel(cdims) || throw(ArgumentError(
22-
"MIOpen supports only cross-correlation as its convolution implementation."))
32+
if !NNlib.flipkernel(cdims)
33+
@warn """
34+
MIOpen supports only cross-correlation (flipkernel=true).
35+
Therefore for every regular convolution (flipkernel=false)
36+
kernel is flipped before calculation.
37+
For better performance, use cross-correlation (flipkernel=true)
38+
and manually flip the kernel before `NNlib.conv` call.
39+
""" maxlog=1
40+
flip_dims = ntuple(
41+
i -> (i ndims(w) - 2) ? (size(w, i):-1:1) : Colon(),
42+
ndims(w))
43+
w = w[flip_dims...]
44+
end
2345

2446
nd = max(0, 4 - N)
2547
ncdims = NNlib.insert_singleton_spatial_dimension(cdims, nd)
@@ -35,9 +57,6 @@ end
3557
function NNlib.∇conv_filter!(
3658
dw::ROCArray{T, N}, x::ROCArray{T, N}, dy::ROCArray{T, N}, cdims::DenseConvDims,
3759
) where {T <: MIOPENFloat, N}
38-
NNlib.flipkernel(cdims) || throw(ArgumentError(
39-
"MIOpen supports only cross-correlation as its convolution implementation."))
40-
4160
nd = max(0, 4 - N)
4261
ncdims = NNlib.insert_singleton_spatial_dimension(cdims, nd)
4362
MIOpen.∇convolution_weight!(
@@ -46,5 +65,19 @@ function NNlib.∇conv_filter!(
4665
NNlib.insert_singleton_spatial_dimension(x, nd);
4766
padding=nnlib_padding(ncdims), stride=NNlib.stride(ncdims),
4867
dilation=NNlib.dilation(ncdims), groups=NNlib.groupcount(ncdims))
68+
69+
if !NNlib.flipkernel(cdims)
70+
@warn """
71+
MIOpen supports only cross-correlation (flipkernel=true).
72+
Therefore for every regular convolution (flipkernel=false)
73+
kernel is flipped before calculation.
74+
For better performance, use cross-correlation (flipkernel=true)
75+
and manually flip the kernel before `NNlib.conv` call.
76+
""" maxlog=1
77+
flip_dims = ntuple(
78+
i -> (i ndims(dw) - 2) ? (size(dw, i):-1:1) : Colon(),
79+
ndims(dw))
80+
dw = dw[flip_dims...]
81+
end
4982
return dw
5083
end

test/amd/conv.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,12 @@
33
for T in (Float16, Float32), nd in (1, 2, 3)
44
x = rand(Float32, fill(4, nd)..., 3, 1)
55
w = rand(Float32, fill(2, nd)..., channels, 4)
6+
67
cdims = DenseConvDims(x, w, flipkernel=true)
78
gputest((x, w) -> NNlib.conv(x, w, cdims), x, w; atol=1e-4)
9+
10+
# This one flips manually kernel for AMDGPU.
11+
cdims = DenseConvDims(x, w)
12+
gputest((x, w) -> NNlib.conv(x, w, cdims), x, w; atol=1e-4)
813
end
914
end

0 commit comments

Comments
 (0)