@@ -65,34 +65,34 @@ const invalid_ir_error = using_cuda ? cuda_mod.InvalidIRError : ErrorException
65
65
# skipped.
66
66
function test_field_broadcast (;
67
67
test_name,
68
- get_result:: F1 ,
69
- set_result! :: F2 ,
70
- ref_set_result! :: F3 = nothing ,
68
+ get_result,
69
+ set_result,
70
+ ref_set_result = nothing ,
71
71
time_ratio_limit = 10 ,
72
72
max_eps_error_limit = 10 ,
73
73
test_broken_with_cuda = false ,
74
- ) where {F1, F2, F3}
74
+ )
75
75
@testset " $test_name " begin
76
76
if test_broken_with_cuda && using_cuda
77
- @test_throws invalid_ir_error get_result ( )
77
+ @test_throws invalid_ir_error materialize (get_result )
78
78
@warn " $test_name :\n\t CUDA.InvalidIRError"
79
79
return
80
80
end
81
81
82
- result = get_result ( )
82
+ result = materialize (get_result )
83
83
result_copy = copy (result)
84
- time = @benchmark set_result ! (result)
84
+ time = @benchmark materialize ! (result, set_result )
85
85
time_rounded = round (time; sigdigits = 2 )
86
86
87
87
# Test that set_result! sets the same value as get_result.
88
88
@test result == result_copy
89
89
90
- if isnothing (ref_set_result! )
90
+ if isnothing (ref_set_result)
91
91
@info " $test_name :\n\t Time = $time_rounded s (reference \
92
92
implementation unavailable)"
93
93
else
94
94
ref_result = similar (result)
95
- ref_time = @benchmark ref_set_result ! (ref_result)
95
+ ref_time = @benchmark materialize ! (ref_result, ref_set_result )
96
96
ref_time_rounded = round (ref_time; sigdigits = 2 )
97
97
time_ratio = time / ref_time
98
98
time_ratio_rounded = round (time_ratio; sigdigits = 2 )
@@ -109,101 +109,28 @@ function test_field_broadcast(;
109
109
\n\t Maximum Error = $max_eps_error eps"
110
110
111
111
# Test that set_result! is performant and correct when compared
112
- # against ref_set_result! .
112
+ # against ref_set_result.
113
113
@test time / ref_time <= time_ratio_limit
114
114
@test max_eps_error <= max_eps_error_limit
115
115
end
116
116
117
117
# Test get_result and set_result! for type instabilities, and test
118
118
# set_result! for allocations. Ignore the type instabilities in CUDA and
119
119
# the allocations they incur.
120
- @test_opt ignored_modules = cuda_frames get_result ( )
121
- @test_opt ignored_modules = cuda_frames set_result ! (result)
122
- using_cuda || @test (@allocated set_result ! (result)) == 0
120
+ @test_opt ignored_modules = cuda_frames materialize (get_result )
121
+ @test_opt ignored_modules = cuda_frames materialize ! (result, set_result )
122
+ using_cuda || @test (@allocated materialize ! (result, set_result )) == 0
123
123
124
- if ! isnothing (ref_set_result! )
124
+ if ! isnothing (ref_set_result)
125
125
# Test ref_set_result! for type instabilities and allocations to
126
126
# ensure that the performance comparison is fair.
127
- @test_opt ignored_modules = cuda_frames ref_set_result! (ref_result)
128
- using_cuda || @test (@allocated ref_set_result! (ref_result)) == 0
129
- end
130
- end
131
- end
132
-
133
- # Test the allocating and non-allocating versions of a field broadcast against
134
- # a reference array-based non-allocating implementation. Ensure that they are
135
- # performant, correct, and type-stable, and print some useful information. In
136
- # order for the input arrays and temporary scratch arrays used by the reference
137
- # implementation to be generated automatically, the corresponding fields must be
138
- # passed to this function.
139
- function test_field_broadcast_against_array_reference (;
140
- test_name,
141
- get_result:: F1 ,
142
- set_result!:: F2 ,
143
- input_fields,
144
- get_temp_value_fields = () -> (),
145
- ref_set_result!:: F3 ,
146
- time_ratio_limit = 10 ,
147
- max_eps_error_limit = 10 ,
148
- test_broken_with_cuda = false ,
149
- ) where {F1, F2, F3}
150
- @testset " $test_name " begin
151
- if test_broken_with_cuda && using_cuda
152
- @test_throws invalid_ir_error get_result ()
153
- @warn " $test_name :\n\t CUDA.InvalidIRError"
154
- return
155
- end
156
-
157
- result = get_result ()
158
- result_copy = copy (result)
159
- time = @benchmark set_result! (result)
160
- time_rounded = round (time; sigdigits = 2 )
161
-
162
- # Test that set_result! sets the same value as get_result.
163
- @test result == result_copy
164
-
165
- ref_result = similar (result)
166
- temp_value_fields = map (similar, get_temp_value_fields ())
167
-
168
- result_arrays = MatrixFields. field2arrays (result)
169
- ref_result_arrays = MatrixFields. field2arrays (ref_result)
170
- inputs_arrays = map (MatrixFields. field2arrays, input_fields)
171
- temp_values_arrays = map (MatrixFields. field2arrays, temp_value_fields)
172
-
173
- function call_ref_set_result! ()
174
- for arrays in
175
- zip (ref_result_arrays, inputs_arrays... , temp_values_arrays... )
176
- ref_set_result! (arrays... )
177
- end
127
+ @test_opt ignored_modules = cuda_frames materialize! (
128
+ ref_result,
129
+ ref_set_result,
130
+ )
131
+ using_cuda ||
132
+ @test (@allocated materialize! (ref_result, ref_set_result)) == 0
178
133
end
179
-
180
- ref_time = @benchmark call_ref_set_result! ()
181
- ref_time_rounded = round (ref_time; sigdigits = 2 )
182
- time_ratio = time / ref_time
183
- time_ratio_rounded = round (time_ratio; sigdigits = 2 )
184
- max_error = compute_max_error (result_arrays, ref_result_arrays)
185
- max_eps_error = ceil (Int, max_error / eps (typeof (max_error)))
186
-
187
- @info " $test_name :\n\t Time Ratio = $time_ratio_rounded ($time_rounded \
188
- s vs. $ref_time_rounded s for reference)\n\t Maximum Error = \
189
- $max_eps_error eps"
190
-
191
- # Test that set_result! is performant and correct when compared against
192
- # ref_set_result!.
193
- @test time / ref_time <= time_ratio_limit
194
- @test max_eps_error <= max_eps_error_limit
195
-
196
- # Test get_result and set_result! for type instabilities, and test
197
- # set_result! for allocations. Ignore the type instabilities in CUDA and
198
- # the allocations they incur.
199
- @test_opt ignored_modules = cuda_frames get_result ()
200
- @test_opt ignored_modules = cuda_frames set_result! (result)
201
- using_cuda || @test (@allocated set_result! (result)) == 0
202
-
203
- # Test ref_set_result! for type instabilities and allocations to ensure
204
- # that the performance comparison is fair.
205
- @test_opt ignored_modules = cuda_frames call_ref_set_result! ()
206
- using_cuda || @test (@allocated call_ref_set_result! ()) == 0
207
134
end
208
135
end
209
136
0 commit comments