Skip to content

Commit 73c8609

Browse files
committed
Some code changes, and more metrics.
1 parent 7e5e6b2 commit 73c8609

File tree

1 file changed

+8
-3
lines changed

1 file changed

+8
-3
lines changed

examples/directives/matrix_multiply_c_openacc.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,20 @@
99
process_directives
1010
)
1111

12+
N = 4096
13+
1214
code = """
1315
#define N 4096
1416
1517
void matrix_multiply(float *A, float *B, float *C) {
1618
#pragma tuner start mm A(float*:NN) B(float*:NN) C(float*:NN)
1719
float temp_sum = 0.0f;
1820
#pragma acc parallel vector_length(nthreads)
19-
#pragma acc loop collapse(2) reduction(+:temp_sum)
21+
#pragma acc loop gang collapse(2)
2022
for ( int i = 0; i < N; i++) {
2123
for ( int j = 0; j < N; j++ ) {
2224
temp_sum = 0.0f;
25+
#pragma acc loop vector reduction(+:temp_sum)
2326
for ( int k = 0; k < N; k++ ) {
2427
temp_sum += A[(i * N) + k] * B[(k * N) + j];
2528
}
@@ -32,13 +35,15 @@
3235

3336
# Extract tunable directive
3437
app = Code(OpenACC(), Cxx())
35-
dims = {"NN": 4096*4096}
38+
dims = {"NN": N**2}
3639
kernel_string, kernel_args = process_directives(app, code, user_dimensions=dims)
3740

3841
tune_params = dict()
3942
tune_params["nthreads"] = [32 * i for i in range(1, 33)]
4043
metrics = dict()
41-
metrics["GB/s"] = lambda x: ((4096 * 4096 * 4096 * 2 * 4) + (4096 * 4096 * 4)) / (x["time"] / 10**3) / 10**9
44+
metrics["time_s"] = lambda x: x["time"] / 10**3
45+
metrics["GB/s"] = lambda x: ((N**3 * 2 * 4) + (N**2 * 4)) / x["time_s"] / 10**9
46+
metrics["GFLOP/s"] = lambda x: (N**3 * 3) / x["time_s"] / 10**9
4247

4348
tune_kernel(
4449
"mm",

0 commit comments

Comments
 (0)