Skip to content

Commit 331571c

Browse files
Merge pull request #1885 from CliMA/ck/refactor_operator_matrices
Use LazyBroadcast in operator matrices tests
2 parents e239ab3 + 970733a commit 331571c

File tree

3 files changed

+147
-189
lines changed

3 files changed

+147
-189
lines changed

test/MatrixFields/matrix_field_test_utils.jl

Lines changed: 20 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -65,34 +65,34 @@ const invalid_ir_error = using_cuda ? cuda_mod.InvalidIRError : ErrorException
6565
# skipped.
6666
function test_field_broadcast(;
6767
test_name,
68-
get_result::F1,
69-
set_result!::F2,
70-
ref_set_result!::F3 = nothing,
68+
get_result,
69+
set_result,
70+
ref_set_result = nothing,
7171
time_ratio_limit = 10,
7272
max_eps_error_limit = 10,
7373
test_broken_with_cuda = false,
74-
) where {F1, F2, F3}
74+
)
7575
@testset "$test_name" begin
7676
if test_broken_with_cuda && using_cuda
77-
@test_throws invalid_ir_error get_result()
77+
@test_throws invalid_ir_error materialize(get_result)
7878
@warn "$test_name:\n\tCUDA.InvalidIRError"
7979
return
8080
end
8181

82-
result = get_result()
82+
result = materialize(get_result)
8383
result_copy = copy(result)
84-
time = @benchmark set_result!(result)
84+
time = @benchmark materialize!(result, set_result)
8585
time_rounded = round(time; sigdigits = 2)
8686

8787
# Test that set_result! sets the same value as get_result.
8888
@test result == result_copy
8989

90-
if isnothing(ref_set_result!)
90+
if isnothing(ref_set_result)
9191
@info "$test_name:\n\tTime = $time_rounded s (reference \
9292
implementation unavailable)"
9393
else
9494
ref_result = similar(result)
95-
ref_time = @benchmark ref_set_result!(ref_result)
95+
ref_time = @benchmark materialize!(ref_result, ref_set_result)
9696
ref_time_rounded = round(ref_time; sigdigits = 2)
9797
time_ratio = time / ref_time
9898
time_ratio_rounded = round(time_ratio; sigdigits = 2)
@@ -109,101 +109,28 @@ function test_field_broadcast(;
109109
\n\tMaximum Error = $max_eps_error eps"
110110

111111
# Test that set_result! is performant and correct when compared
112-
# against ref_set_result!.
112+
# against ref_set_result.
113113
@test time / ref_time <= time_ratio_limit
114114
@test max_eps_error <= max_eps_error_limit
115115
end
116116

117117
# Test get_result and set_result! for type instabilities, and test
118118
# set_result! for allocations. Ignore the type instabilities in CUDA and
119119
# the allocations they incur.
120-
@test_opt ignored_modules = cuda_frames get_result()
121-
@test_opt ignored_modules = cuda_frames set_result!(result)
122-
using_cuda || @test (@allocated set_result!(result)) == 0
120+
@test_opt ignored_modules = cuda_frames materialize(get_result)
121+
@test_opt ignored_modules = cuda_frames materialize!(result, set_result)
122+
using_cuda || @test (@allocated materialize!(result, set_result)) == 0
123123

124-
if !isnothing(ref_set_result!)
124+
if !isnothing(ref_set_result)
125125
# Test ref_set_result! for type instabilities and allocations to
126126
# ensure that the performance comparison is fair.
127-
@test_opt ignored_modules = cuda_frames ref_set_result!(ref_result)
128-
using_cuda || @test (@allocated ref_set_result!(ref_result)) == 0
129-
end
130-
end
131-
end
132-
133-
# Test the allocating and non-allocating versions of a field broadcast against
134-
# a reference array-based non-allocating implementation. Ensure that they are
135-
# performant, correct, and type-stable, and print some useful information. In
136-
# order for the input arrays and temporary scratch arrays used by the reference
137-
# implementation to be generated automatically, the corresponding fields must be
138-
# passed to this function.
139-
function test_field_broadcast_against_array_reference(;
140-
test_name,
141-
get_result::F1,
142-
set_result!::F2,
143-
input_fields,
144-
get_temp_value_fields = () -> (),
145-
ref_set_result!::F3,
146-
time_ratio_limit = 10,
147-
max_eps_error_limit = 10,
148-
test_broken_with_cuda = false,
149-
) where {F1, F2, F3}
150-
@testset "$test_name" begin
151-
if test_broken_with_cuda && using_cuda
152-
@test_throws invalid_ir_error get_result()
153-
@warn "$test_name:\n\tCUDA.InvalidIRError"
154-
return
155-
end
156-
157-
result = get_result()
158-
result_copy = copy(result)
159-
time = @benchmark set_result!(result)
160-
time_rounded = round(time; sigdigits = 2)
161-
162-
# Test that set_result! sets the same value as get_result.
163-
@test result == result_copy
164-
165-
ref_result = similar(result)
166-
temp_value_fields = map(similar, get_temp_value_fields())
167-
168-
result_arrays = MatrixFields.field2arrays(result)
169-
ref_result_arrays = MatrixFields.field2arrays(ref_result)
170-
inputs_arrays = map(MatrixFields.field2arrays, input_fields)
171-
temp_values_arrays = map(MatrixFields.field2arrays, temp_value_fields)
172-
173-
function call_ref_set_result!()
174-
for arrays in
175-
zip(ref_result_arrays, inputs_arrays..., temp_values_arrays...)
176-
ref_set_result!(arrays...)
177-
end
127+
@test_opt ignored_modules = cuda_frames materialize!(
128+
ref_result,
129+
ref_set_result,
130+
)
131+
using_cuda ||
132+
@test (@allocated materialize!(ref_result, ref_set_result)) == 0
178133
end
179-
180-
ref_time = @benchmark call_ref_set_result!()
181-
ref_time_rounded = round(ref_time; sigdigits = 2)
182-
time_ratio = time / ref_time
183-
time_ratio_rounded = round(time_ratio; sigdigits = 2)
184-
max_error = compute_max_error(result_arrays, ref_result_arrays)
185-
max_eps_error = ceil(Int, max_error / eps(typeof(max_error)))
186-
187-
@info "$test_name:\n\tTime Ratio = $time_ratio_rounded ($time_rounded \
188-
s vs. $ref_time_rounded s for reference)\n\tMaximum Error = \
189-
$max_eps_error eps"
190-
191-
# Test that set_result! is performant and correct when compared against
192-
# ref_set_result!.
193-
@test time / ref_time <= time_ratio_limit
194-
@test max_eps_error <= max_eps_error_limit
195-
196-
# Test get_result and set_result! for type instabilities, and test
197-
# set_result! for allocations. Ignore the type instabilities in CUDA and
198-
# the allocations they incur.
199-
@test_opt ignored_modules = cuda_frames get_result()
200-
@test_opt ignored_modules = cuda_frames set_result!(result)
201-
using_cuda || @test (@allocated set_result!(result)) == 0
202-
203-
# Test ref_set_result! for type instabilities and allocations to ensure
204-
# that the performance comparison is fair.
205-
@test_opt ignored_modules = cuda_frames call_ref_set_result!()
206-
using_cuda || @test (@allocated call_ref_set_result!()) == 0
207134
end
208135
end
209136

0 commit comments

Comments
 (0)