1
- ! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
2
- ! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
1
+ ! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s --check-prefix=CPU
2
+ ! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s --check-prefix=CPU
3
+
4
+ ! RUN: bbc -emit-hlfir -fopenmp -fopenmp-is-target-device -fopenmp-is-gpu -o - %s 2>&1 | FileCheck %s --check-prefix=GPU
5
+ ! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-hlfir -fopenmp -fopenmp-is-target-device -o - %s 2>&1 | FileCheck %s --check-prefix=GPU
3
6
4
7
program reduce
5
8
integer , dimension (3 ) :: i = 0
@@ -13,81 +16,88 @@ program reduce
13
16
print * ,i
14
17
end program
15
18
16
- ! CHECK -LABEL: omp.declare_reduction @add_reduction_byref_box_3xi32 : !fir.ref<!fir.box<!fir.array<3xi32>>> alloc {
17
- ! CHECK : %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.array<3xi32>>
18
- ! CHECK : omp.yield(%[[VAL_8]] : !fir.ref<!fir.box<!fir.array<3xi32>>>)
19
- ! CHECK -LABEL: } init {
20
- ! CHECK : ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>, %[[ALLOC:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>):
21
- ! CHECK : %[[VAL_2:.*]] = arith.constant 0 : i32
22
- ! CHECK : %[[VAL_3:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
23
- ! CHECK : %[[VAL_4:.*]] = arith.constant 3 : index
24
- ! CHECK : %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1>
25
- ! CHECK : %[[VAL_1:.*]] = fir.allocmem !fir.array<3xi32> {bindc_name = ".tmp", uniq_name = ""}
26
- ! CHECK : %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<3xi32>>,
27
- ! CHECK : %[[TRUE:.*]] = arith.constant true
19
+ ! CPU -LABEL: omp.declare_reduction @add_reduction_byref_box_3xi32 : !fir.ref<!fir.box<!fir.array<3xi32>>> alloc {
20
+ ! CPU : %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.array<3xi32>>
21
+ ! CPU : omp.yield(%[[VAL_8]] : !fir.ref<!fir.box<!fir.array<3xi32>>>)
22
+ ! CPU -LABEL: } init {
23
+ ! CPU : ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>, %[[ALLOC:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>):
24
+ ! CPU : %[[VAL_2:.*]] = arith.constant 0 : i32
25
+ ! CPU : %[[VAL_3:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
26
+ ! CPU : %[[VAL_4:.*]] = arith.constant 3 : index
27
+ ! CPU : %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1>
28
+ ! CPU : %[[VAL_1:.*]] = fir.allocmem !fir.array<3xi32> {bindc_name = ".tmp", uniq_name = ""}
29
+ ! CPU : %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<3xi32>>,
30
+ ! CPU : %[[TRUE:.*]] = arith.constant true
28
31
! fir.shape<1>) -> (!fir.heap<!fir.array<3xi32>>, !fir.heap<!fir.array<3xi32>>)
29
- ! CHECK: %[[C0:.*]] = arith.constant 0 : index
30
- ! CHECK: %[[DIMS:.*]]:3 = fir.box_dims %[[VAL_3]], %[[C0]] : (!fir.box<!fir.array<3xi32>>, index) -> (index, index, index)
31
- ! CHECK: %[[SHIFT:.*]] = fir.shape_shift %[[DIMS]]#0, %[[DIMS]]#1 : (index, index) -> !fir.shapeshift<1>
32
- ! CHECK: %[[VAL_7:.*]] = fir.embox %[[VAL_6]]#0(%[[SHIFT]]) : (!fir.heap<!fir.array<3xi32>>, !fir.shapeshift<1>) -> !fir.box<!fir.array<3xi32>>
33
- ! CHECK: hlfir.assign %[[VAL_2]] to %[[VAL_7]] : i32, !fir.box<!fir.array<3xi32>>
34
- ! CHECK: fir.store %[[VAL_7]] to %[[ALLOC]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
35
- ! CHECK: omp.yield(%[[ALLOC]] : !fir.ref<!fir.box<!fir.array<3xi32>>>)
36
- ! CHECK: } combiner {
37
- ! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>, %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>):
38
- ! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
39
- ! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
40
- ! CHECK: %[[C1:.*]] = arith.constant 1 : index
41
- ! CHECK: %[[C3:.*]] = arith.constant 3 : index
42
- ! CHECK: %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[C1]], %[[C3]] : (index, index) -> !fir.shapeshift<1>
43
- ! CHECK: %[[C1_0:.*]] = arith.constant 1 : index
44
- ! CHECK: fir.do_loop %[[VAL_8:.*]] = %[[C1_0]] to %[[C3]] step %[[C1_0]] unordered {
45
- ! CHECK: %[[VAL_9:.*]] = fir.array_coor %[[VAL_2]](%[[SHAPE_SHIFT]]) %[[VAL_8]] : (!fir.box<!fir.array<3xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
46
- ! CHECK: %[[VAL_10:.*]] = fir.array_coor %[[VAL_3]](%[[SHAPE_SHIFT]]) %[[VAL_8]] : (!fir.box<!fir.array<3xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
47
- ! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
48
- ! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
49
- ! CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_12]] : i32
50
- ! CHECK: fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref<i32>
51
- ! CHECK: }
52
- ! CHECK: omp.yield(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>)
53
- ! CHECK: } cleanup {
54
- ! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>):
55
- ! CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
56
- ! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box<!fir.array<3xi32>>) -> !fir.ref<!fir.array<3xi32>>
57
- ! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.array<3xi32>>) -> i64
58
- ! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64
59
- ! CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64
60
- ! CHECK: fir.if %[[VAL_5]] {
61
- ! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.array<3xi32>>) -> !fir.heap<!fir.array<3xi32>>
62
- ! CHECK: fir.freemem %[[VAL_6]] : !fir.heap<!fir.array<3xi32>>
63
- ! CHECK: }
64
- ! CHECK: omp.yield
65
- ! CHECK: }
32
+ ! CPU: %[[C0:.*]] = arith.constant 0 : index
33
+ ! CPU: %[[DIMS:.*]]:3 = fir.box_dims %[[VAL_3]], %[[C0]] : (!fir.box<!fir.array<3xi32>>, index) -> (index, index, index)
34
+ ! CPU: %[[SHIFT:.*]] = fir.shape_shift %[[DIMS]]#0, %[[DIMS]]#1 : (index, index) -> !fir.shapeshift<1>
35
+ ! CPU: %[[VAL_7:.*]] = fir.embox %[[VAL_6]]#0(%[[SHIFT]]) : (!fir.heap<!fir.array<3xi32>>, !fir.shapeshift<1>) -> !fir.box<!fir.array<3xi32>>
36
+ ! CPU: hlfir.assign %[[VAL_2]] to %[[VAL_7]] : i32, !fir.box<!fir.array<3xi32>>
37
+ ! CPU: fir.store %[[VAL_7]] to %[[ALLOC]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
38
+ ! CPU: omp.yield(%[[ALLOC]] : !fir.ref<!fir.box<!fir.array<3xi32>>>)
39
+ ! CPU: } combiner {
40
+ ! CPU: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>, %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>):
41
+ ! CPU: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
42
+ ! CPU: %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
43
+ ! CPU: %[[C1:.*]] = arith.constant 1 : index
44
+ ! CPU: %[[C3:.*]] = arith.constant 3 : index
45
+ ! CPU: %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[C1]], %[[C3]] : (index, index) -> !fir.shapeshift<1>
46
+ ! CPU: %[[C1_0:.*]] = arith.constant 1 : index
47
+ ! CPU: fir.do_loop %[[VAL_8:.*]] = %[[C1_0]] to %[[C3]] step %[[C1_0]] unordered {
48
+ ! CPU: %[[VAL_9:.*]] = fir.array_coor %[[VAL_2]](%[[SHAPE_SHIFT]]) %[[VAL_8]] : (!fir.box<!fir.array<3xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
49
+ ! CPU: %[[VAL_10:.*]] = fir.array_coor %[[VAL_3]](%[[SHAPE_SHIFT]]) %[[VAL_8]] : (!fir.box<!fir.array<3xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
50
+ ! CPU: %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
51
+ ! CPU: %[[VAL_12:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
52
+ ! CPU: %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_12]] : i32
53
+ ! CPU: fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref<i32>
54
+ ! CPU: }
55
+ ! CPU: omp.yield(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>)
56
+ ! CPU: } cleanup {
57
+ ! CPU: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>):
58
+ ! CPU: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
59
+ ! CPU: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box<!fir.array<3xi32>>) -> !fir.ref<!fir.array<3xi32>>
60
+ ! CPU: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.array<3xi32>>) -> i64
61
+ ! CPU: %[[VAL_4:.*]] = arith.constant 0 : i64
62
+ ! CPU: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64
63
+ ! CPU: fir.if %[[VAL_5]] {
64
+ ! CPU: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.array<3xi32>>) -> !fir.heap<!fir.array<3xi32>>
65
+ ! CPU: fir.freemem %[[VAL_6]] : !fir.heap<!fir.array<3xi32>>
66
+ ! CPU: }
67
+ ! CPU: omp.yield
68
+ ! CPU: }
69
+
70
+ ! CPU-LABEL: func.func @_QQmain()
71
+ ! CPU: %[[VAL_0:.*]] = fir.address_of(@_QFEi) : !fir.ref<!fir.array<3xi32>>
72
+ ! CPU: %[[VAL_1:.*]] = arith.constant 3 : index
73
+ ! CPU: %[[VAL_2:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
74
+ ! CPU: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_2]]) {uniq_name = "_QFEi"} : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xi32>>, !fir.ref<!fir.array<3xi32>>)
75
+ ! CPU: %[[VAL_4:.*]] = fir.embox %[[VAL_3]]#0(%[[VAL_2]]) : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<3xi32>>
76
+ ! CPU: %[[VAL_5:.*]] = fir.alloca !fir.box<!fir.array<3xi32>>
77
+ ! CPU: fir.store %[[VAL_4]] to %[[VAL_5]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
78
+ ! CPU: omp.parallel reduction(byref @add_reduction_byref_box_3xi32 %[[VAL_5]] -> %[[VAL_6:.*]] : !fir.ref<!fir.box<!fir.array<3xi32>>>) {
79
+ ! CPU: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFEi"} : (!fir.ref<!fir.box<!fir.array<3xi32>>>) -> (!fir.ref<!fir.box<!fir.array<3xi32>>>, !fir.ref<!fir.box<!fir.array<3xi32>>>)
80
+ ! CPU: %[[VAL_8:.*]] = arith.constant 1 : i32
81
+ ! CPU: %[[VAL_9:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<3xi32>>>
82
+ ! CPU: %[[VAL_10:.*]] = arith.constant 1 : index
83
+ ! CPU: %[[VAL_11:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_10]]) : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
84
+ ! CPU: hlfir.assign %[[VAL_8]] to %[[VAL_11]] : i32, !fir.ref<i32>
85
+ ! CPU: %[[VAL_12:.*]] = arith.constant 2 : i32
86
+ ! CPU: %[[VAL_13:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<3xi32>>>
87
+ ! CPU: %[[VAL_14:.*]] = arith.constant 2 : index
88
+ ! CPU: %[[VAL_15:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_14]]) : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
89
+ ! CPU: hlfir.assign %[[VAL_12]] to %[[VAL_15]] : i32, !fir.ref<i32>
90
+ ! CPU: %[[VAL_16:.*]] = arith.constant 3 : i32
91
+ ! CPU: %[[VAL_17:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<3xi32>>>
92
+ ! CPU: %[[VAL_18:.*]] = arith.constant 3 : index
93
+ ! CPU: %[[VAL_19:.*]] = hlfir.designate %[[VAL_17]] (%[[VAL_18]]) : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
94
+ ! CPU: hlfir.assign %[[VAL_16]] to %[[VAL_19]] : i32, !fir.ref<i32>
95
+ ! CPU: omp.terminator
96
+ ! CPU: }
66
97
67
- ! CHECK-LABEL: func.func @_QQmain()
68
- ! CHECK: %[[VAL_0:.*]] = fir.address_of(@_QFEi) : !fir.ref<!fir.array<3xi32>>
69
- ! CHECK: %[[VAL_1:.*]] = arith.constant 3 : index
70
- ! CHECK: %[[VAL_2:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
71
- ! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_2]]) {uniq_name = "_QFEi"} : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xi32>>, !fir.ref<!fir.array<3xi32>>)
72
- ! CHECK: %[[VAL_4:.*]] = fir.embox %[[VAL_3]]#0(%[[VAL_2]]) : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<3xi32>>
73
- ! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.box<!fir.array<3xi32>>
74
- ! CHECK: fir.store %[[VAL_4]] to %[[VAL_5]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
75
- ! CHECK: omp.parallel reduction(byref @add_reduction_byref_box_3xi32 %[[VAL_5]] -> %[[VAL_6:.*]] : !fir.ref<!fir.box<!fir.array<3xi32>>>) {
76
- ! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFEi"} : (!fir.ref<!fir.box<!fir.array<3xi32>>>) -> (!fir.ref<!fir.box<!fir.array<3xi32>>>, !fir.ref<!fir.box<!fir.array<3xi32>>>)
77
- ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32
78
- ! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<3xi32>>>
79
- ! CHECK: %[[VAL_10:.*]] = arith.constant 1 : index
80
- ! CHECK: %[[VAL_11:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_10]]) : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
81
- ! CHECK: hlfir.assign %[[VAL_8]] to %[[VAL_11]] : i32, !fir.ref<i32>
82
- ! CHECK: %[[VAL_12:.*]] = arith.constant 2 : i32
83
- ! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<3xi32>>>
84
- ! CHECK: %[[VAL_14:.*]] = arith.constant 2 : index
85
- ! CHECK: %[[VAL_15:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_14]]) : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
86
- ! CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_15]] : i32, !fir.ref<i32>
87
- ! CHECK: %[[VAL_16:.*]] = arith.constant 3 : i32
88
- ! CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<3xi32>>>
89
- ! CHECK: %[[VAL_18:.*]] = arith.constant 3 : index
90
- ! CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_17]] (%[[VAL_18]]) : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
91
- ! CHECK: hlfir.assign %[[VAL_16]] to %[[VAL_19]] : i32, !fir.ref<i32>
92
- ! CHECK: omp.terminator
93
- ! CHECK: }
98
+ ! GPU: omp.declare_reduction {{.*}} alloc {
99
+ ! GPU: } init {
100
+ ! GPU-NOT: fir.allocmem {{.*}} {bindc_name = ".tmp", {{.*}}}
101
+ ! GPU: fir.alloca {{.*}} {bindc_name = ".tmp"}
102
+ ! GPU: } combiner {
103
+ ! GPU: }
0 commit comments