10
10
@test gradient (x -> sum (cpu (x)), gpu (rand (3 ,3 ))) isa Tuple
11
11
end
12
12
13
- # TODO : These layers get into scalar indexing issues.
14
- const BROKEN_LAYERS = Union{}
15
13
16
- const ACTIVATIONS = [identity, relu, tanh,
17
- sigmoid, exp, softplus,
18
- elu, selu]
14
+ const ACTIVATIONS = [identity, tanh]
19
15
20
- function gpu_gradtest (name:: String , layers:: Vector , x_cpu = nothing , args... ; test_cpu = true , test_mode = false )
21
- isnothing (x_cpu) && error (" Missing input to test the layers against." )
16
+ function gpu_gradtest (name:: String , layers:: Vector , x_cpu, args... ;
17
+ test_mode= false , test_grad_x= true ,
18
+ atol= 1e-4 , rtol= 1e-4 )
22
19
@testset " $name GPU grad tests" begin
23
20
for layer in layers
24
21
@testset " $layer Layer GPU grad test" begin
25
22
26
23
# compute output and grad of parameters
27
24
l_cpu = layer (args... )
28
- l_gpu = l_cpu |> gpu
29
25
if test_mode
30
26
testmode! (l_cpu)
31
- testmode! (l_gpu)
32
27
end
33
28
34
- ps_cpu = Flux. params (l_cpu)
35
- y_cpu, back_cpu = pullback (() -> sum (l_cpu (x_cpu)), ps_cpu)
36
- gs_cpu = back_cpu (1f0 )
37
-
38
- x_gpu = gpu (x_cpu)
39
- ps_gpu = Flux. params (l_gpu)
40
-
41
- if typeof (l_gpu) <: BROKEN_LAYERS
42
- @test_broken gradient (() -> sum (l_gpu (x_gpu)), ps_gpu) isa Flux. Zygote. Grads
43
- else
44
- y_gpu, back_gpu = pullback (() -> sum (l_gpu (x_gpu)), ps_gpu)
45
- gs_gpu = back_gpu (1f0 ) # TODO many layers error out when backprop int 1, should fix
46
-
47
- # compute grad of input
48
- xg_cpu = gradient (x -> sum (l_cpu (x)), x_cpu)[1 ]
49
- xg_gpu = gradient (x -> sum (l_gpu (x)), x_gpu)[1 ]
50
-
51
- # test
52
- if test_cpu
53
- if layer === GroupedConvTranspose
54
- @test y_gpu ≈ y_cpu rtol= 1f-2 atol= 1f-3
55
- else
56
- @test y_gpu ≈ y_cpu rtol= 1f-3 atol= 1f-3
57
- end
58
- if isnothing (xg_cpu)
59
- @test isnothing (xg_gpu)
60
- else
61
- if layer === GroupedConvTranspose
62
- @test Array (xg_gpu) ≈ xg_cpu rtol = 2f-2 atol = 1f-3
63
- else
64
- @test Array (xg_gpu) ≈ xg_cpu rtol = 1f-3 atol = 1f-3
65
- end
66
- end
67
- end
68
- @test gs_gpu isa Flux. Zygote. Grads
69
- for (p_cpu, p_gpu) in zip (ps_cpu, ps_gpu)
70
- if isnothing (gs_cpu[p_cpu])
71
- @test isnothing (gs_gpu[p_gpu])
72
- else
73
- @test gs_gpu[p_gpu] isa CuArray
74
- if test_cpu
75
- @test Array (gs_gpu[p_gpu]) ≈ gs_cpu[p_cpu] rtol= 1f-3 atol= 1f-3
76
- end
77
- end
78
- end
79
- end
29
+ test_gradients (l_cpu, x_cpu; test_gpu= true , compare_finite_diff= false , test_grad_x, atol, rtol)
80
30
end
81
31
end
82
32
end
@@ -97,23 +47,24 @@ for act in ACTIVATIONS
97
47
ConvTranspose, ConvTransposeNoBias,
98
48
CrossCor, CrossCorNoBias,
99
49
DepthwiseConv, DepthwiseConvNoBias]
100
- gpu_gradtest (" Convolution with $act " , conv_layers, r, (2 ,2 ), 1 => 3 , act, test_cpu = false )
50
+ gpu_gradtest (" Convolution with $act " , conv_layers, r, (2 ,2 ), 1 => 3 , act)
101
51
102
52
groupedconv = [GroupedConv, GroupedConvTranspose]
103
- gpu_gradtest (" GroupedConvolution with $act " , groupedconv, rand (Float32, 28 , 28 , 100 , 2 ), (3 ,3 ), 100 => 25 , act, test_cpu = true )
53
+ gpu_gradtest (" GroupedConvolution with $act " , groupedconv, rand (Float32, 28 , 28 , 100 , 2 ), (3 ,3 ), 100 => 25 , act)
104
54
105
55
batch_norm = [BatchNorm, BatchNormNoTrackStats]
106
- gpu_gradtest (" BatchNorm 1 with $act " , batch_norm, rand (Float32, 28 ,28 ,3 ,4 ), 3 , act, test_cpu = false ) # TODO fix errors
107
- gpu_gradtest (" BatchNorm 2 with $act " , batch_norm, rand (Float32, 5 ,4 ), 5 , act, test_cpu = true )
56
+ gpu_gradtest (" BatchNorm 1 with $act " , batch_norm, rand (Float32, 28 ,28 ,3 ,4 ), 3 , act, atol = 1e-3 )
57
+ gpu_gradtest (" BatchNorm 2 with $act " , batch_norm, rand (Float32, 5 ,4 ), 5 , act, atol = 1e-3 )
108
58
109
59
batch_norm = [BatchNormNoTrackStats]
110
- gpu_gradtest (" BatchNorm 3 with $act (test mode)" , batch_norm, rand (Float32, 5 ,4 ), 5 , act, test_cpu = true , test_mode = true )
60
+ gpu_gradtest (" BatchNorm 3 with $act (test mode)" , batch_norm, rand (Float32, 5 ,4 ), 5 , act,
61
+ test_mode= true , atol= 1e-3 )
111
62
112
63
instancenorm = [InstanceNorm]
113
- gpu_gradtest (" InstanceNorm with $act " , instancenorm, r, 1 , act, test_cpu = false )
64
+ gpu_gradtest (" InstanceNorm with $act " , instancenorm, r, 1 , act)
114
65
115
66
groupnorm = [GroupNorm]
116
- gpu_gradtest (" GroupNorm with $act " , groupnorm, rand (Float32, 28 ,28 ,3 ,1 ), 3 , 1 , act, test_cpu = false )
67
+ gpu_gradtest (" GroupNorm with $act " , groupnorm, rand (Float32, 28 ,28 ,3 ,1 ), 3 , 1 , act)
117
68
end
118
69
119
70
r = rand (Float32, 28 , 28 , 1 , 1 )
@@ -122,13 +73,13 @@ pooling_layers = [MaxPool, MeanPool]
122
73
gpu_gradtest (" Pooling" , pooling_layers, r, (2 ,2 ))
123
74
124
75
adaptive_pooling_layers = [AdaptiveMaxPool, AdaptiveMeanPool]
125
- gpu_gradtest (" AdaptivePooling" , adaptive_pooling_layers, r, (7 ,7 ), test_cpu = false )
76
+ gpu_gradtest (" AdaptivePooling" , adaptive_pooling_layers, r, (7 ,7 ))
126
77
127
78
dropout_layers = [Dropout, AlphaDropout]
128
- gpu_gradtest (" Dropout" , dropout_layers, r, 0.5f0 ; test_cpu = false ) # dropout is not deterministic
79
+ gpu_gradtest (" Dropout" , dropout_layers, r, 1e-6 ) # dropout is not deterministic
129
80
130
81
layer_norm = [LayerNorm]
131
- gpu_gradtest (" LayerNorm 1" , layer_norm, rand (Float32, 28 ,28 ,3 ,4 ), 28 , test_cpu = false ) # TODO fix errors
82
+ gpu_gradtest (" LayerNorm 1" , layer_norm, rand (Float32, 28 ,28 ,3 ,4 ), 28 )
132
83
gpu_gradtest (" LayerNorm 2" , layer_norm, rand (Float32, 5 ,4 ), 5 )
133
84
134
85
upsample = [x -> Upsample (scale= x)]
@@ -140,32 +91,27 @@ gpu_gradtest("PixelShuffle 2d", pixelshuffle, rand(Float32, 3, 4, 18, 3), 3)
140
91
gpu_gradtest (" PixelShuffle 1d" , pixelshuffle, rand (Float32, 3 , 18 , 3 ), 3 )
141
92
142
93
embedding = [Flux. Embedding]
143
- gpu_gradtest (" Embedding" , embedding, [1 ,3 ,5 ], 5 , 2 )
144
- gpu_gradtest (" Embedding repeated indices" , embedding, [1 ,3 ,5 ,3 ], 5 , 2 )
145
- gpu_gradtest (" Embedding integer index" , embedding, 1 , 5 , 2 )
146
- gpu_gradtest (" Embedding 2d index" , embedding, [1 2 ; 3 4 ], 5 , 2 )
147
- gpu_gradtest (" Embedding OneHotVec index" , embedding, OneHotVector (1 , 5 ), 5 , 2 )
148
- gpu_gradtest (" Embedding OneHotMatrix index" , embedding, OneHotMatrix ([1 ,2 ,3 ], 5 ), 5 , 2 )
149
- gpu_gradtest (" Embedding OneHotMatrix repeated indices" , embedding, OneHotMatrix ([1 ,2 ,2 ], 5 ), 5 , 2 )
94
+ gpu_gradtest (" Embedding" , embedding, [1 ,3 ,5 ], 5 , 2 , test_grad_x = false )
95
+ gpu_gradtest (" Embedding repeated indices" , embedding, [1 ,3 ,5 ,3 ], 5 , 2 , test_grad_x = false )
96
+ gpu_gradtest (" Embedding integer index" , embedding, 1 , 5 , 2 , test_grad_x = false )
97
+ gpu_gradtest (" Embedding 2d index" , embedding, [1 2 ; 3 4 ], 5 , 2 , test_grad_x = false )
98
+ gpu_gradtest (" Embedding OneHotVec index" , embedding, OneHotVector (1 , 5 ), 5 , 2 , test_grad_x = false )
99
+ gpu_gradtest (" Embedding OneHotMatrix index" , embedding, OneHotMatrix ([1 ,2 ,3 ], 5 ), 5 , 2 , test_grad_x = false )
100
+ gpu_gradtest (" Embedding OneHotMatrix repeated indices" , embedding, OneHotMatrix ([1 ,2 ,2 ], 5 ), 5 , 2 , test_grad_x = false )
150
101
151
102
@testset " function layers" begin
152
- x = rand (Float32, 3 ,3 )
153
- gpu_autodiff_test (x -> sum (Flux. normalise (x; dims= 1 )), x)
154
- gpu_autodiff_test (x -> sum (Flux. normalise (x; dims= 2 )), x)
155
- gpu_autodiff_test (x -> sum (Flux. normalise (x)), x)
103
+ x = rand (Float32, 3 , 3 )
104
+ test_gradients (x -> sum (Flux. normalise (x; dims= 1 )), x, test_gpu = true , compare_finite_diff = false )
105
+ test_gradients (x -> sum (Flux. normalise (x; dims= 2 )), x, test_gpu = true , compare_finite_diff = false )
106
+ test_gradients (x -> sum (Flux. normalise (x)), x, test_gpu = true , compare_finite_diff = false )
156
107
end
157
108
158
109
@testset " Zeros mapped for $cl " for cl in (Conv, ConvTranspose, CrossCor, DepthwiseConv)
159
110
l = cl ((2 ,2 ), 1 => 3 , bias = false ) |> gpu
160
111
ip = zeros (Float32, 28 ,28 ,1 ,1 ) |> gpu
161
- if typeof (l) <: BROKEN_LAYERS
162
- @test_broken sum (l (ip)) ≈ 0.f0
163
- @test_broken gradient (() -> sum (l (ip)), Flux. params (l)) isa Flux. Zygote. Grads
164
- else
165
- @test sum (l (ip)) ≈ 0.f0
166
- gs = gradient (() -> sum (l (ip)), Flux. params (l))
167
- @test l. bias ∉ gs. params
168
- end
112
+ @test sum (l (ip)) ≈ 0.f0
113
+ gs = gradient (() -> sum (l (ip)), Flux. params (l))
114
+ @test l. bias ∉ gs. params
169
115
end
170
116
171
117
@testset " Dense without bias" begin
366
312
@test Array (y_gpu) ≈ y_cpu atol= 1e-4
367
313
@test Array (α_gpu) ≈ α_cpu atol= 1e-4
368
314
369
- gm_cpu, gx_cpu = gradient (mha_cpu, x_cpu) do mha, x
370
- y, α = mha (x)
371
- return sum (y.^ 2 ) + sum (α.^ 2 )
372
- end
373
- gm_gpu, gx_gpu = gradient (mha_gpu, x_gpu) do mha, x
374
- y, α = mha (x)
375
- return sum (y.^ 2 ) + sum (α.^ 2 )
376
- end
377
- check_grad (gm_gpu, gm_cpu)
378
- check_grad (gx_gpu, gx_cpu)
315
+ test_gradients (mha_cpu, x_cpu, loss = o -> sum (o[1 ]. ^ 2 ) + sum (o[2 ]. ^ 2 ),
316
+ test_gpu= true , compare_finite_diff= false )
379
317
end
0 commit comments