@@ -136,24 +136,25 @@ _Generic((*(out)), \
136
136
} \
137
137
}
138
138
#elif defined(GENERATE_SVE_CODE )
139
- #define OP_AARCH64_FUNC (name , type_name , type_size , type_cnt , type , op ) \
139
+ #define OP_AARCH64_FUNC (name , type_name , type_size , type_cnt , type , op ) \
140
+ SVE_ATTR \
140
141
static void OP_CONCAT (ompi_op_aarch64_2buff_ ##name ##_##type##type_size##_t, APPEND) \
141
- (const void *_in, void *_out, int *count, \
142
- struct ompi_datatype_t **dtype, \
143
- struct ompi_op_base_module_1_0_0_t *module) \
144
- { \
145
- const int types_per_step = svcnt(*((type##type_size##_t *) _in)); \
146
- const int cnt = *count; \
147
- type##type_size##_t *in = (type##type_size##_t *) _in, \
148
- *out = (type##type_size##_t *) _out; \
149
- OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##_t) vsrc, vdst; \
150
- for (int idx=0; idx < cnt; idx += types_per_step) { \
151
- svbool_t pred = svwhilelt_b##type_size(idx, cnt); \
152
- vsrc = svld1(pred, &in[idx]); \
153
- vdst = svld1(pred, &out[idx]); \
154
- vdst = OP_CONCAT(OMPI_OP_OP_PREPEND, op##_x)(pred, vdst, vsrc); \
155
- OP_CONCAT(OMPI_OP_OP_PREPEND, st1)(pred, &out[idx], vdst); \
156
- } \
142
+ (const void *_in, void *_out, int *count, \
143
+ struct ompi_datatype_t **dtype, \
144
+ struct ompi_op_base_module_1_0_0_t *module) \
145
+ { \
146
+ const int types_per_step = svcnt(*((type##type_size##_t *) _in)); \
147
+ const int cnt = *count; \
148
+ type##type_size##_t *in = (type##type_size##_t *) _in, \
149
+ *out = (type##type_size##_t *) _out; \
150
+ OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##_t) vsrc, vdst; \
151
+ for (int idx=0; idx < cnt; idx += types_per_step) { \
152
+ svbool_t pred = svwhilelt_b##type_size(idx, cnt); \
153
+ vsrc = svld1(pred, &in[idx]); \
154
+ vdst = svld1(pred, &out[idx]); \
155
+ vdst = OP_CONCAT(OMPI_OP_OP_PREPEND, op##_x)(pred, vdst, vsrc); \
156
+ OP_CONCAT(OMPI_OP_OP_PREPEND, st1)(pred, &out[idx], vdst); \
157
+ } \
157
158
}
158
159
#endif
159
160
@@ -302,25 +303,26 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
302
303
} \
303
304
}
304
305
#elif defined(GENERATE_SVE_CODE )
305
- #define OP_AARCH64_FUNC_3BUFF (name , type_name , type_size , type_cnt , type , op ) \
306
- static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPEND) \
307
- (const void *_in1, const void *_in2, void *_out, int *count, \
308
- struct ompi_datatype_t **dtype, \
309
- struct ompi_op_base_module_1_0_0_t *module) \
310
- { \
311
- const int types_per_step = svcnt(*((type##type_size##_t *) _in1)); \
312
- type##type_size##_t *in1 = (type##type_size##_t *) _in1, \
313
- *in2 = (type##type_size##_t *) _in2, \
314
- *out = (type##type_size##_t *) _out; \
315
- const int cnt = *count; \
316
- OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##_t) vsrc, vdst; \
317
- for (int idx=0; idx < cnt; idx += types_per_step) { \
318
- svbool_t pred = svwhilelt_b##type_size(idx, cnt); \
319
- vsrc = svld1(pred, &in1[idx]); \
320
- vdst = svld1(pred, &in2[idx]); \
321
- vdst = OP_CONCAT(OMPI_OP_OP_PREPEND, op##_x)(pred, vdst, vsrc); \
322
- OP_CONCAT(OMPI_OP_OP_PREPEND, st1)(pred, &out[idx], vdst); \
323
- } \
306
+ #define OP_AARCH64_FUNC_3BUFF (name , type_name , type_size , type_cnt , type , op ) \
307
+ SVE_ATTR \
308
+ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPEND) \
309
+ (const void *_in1, const void *_in2, void *_out, int *count, \
310
+ struct ompi_datatype_t **dtype, \
311
+ struct ompi_op_base_module_1_0_0_t *module) \
312
+ { \
313
+ const int types_per_step = svcnt(*((type##type_size##_t *) _in1)); \
314
+ type##type_size##_t *in1 = (type##type_size##_t *) _in1, \
315
+ *in2 = (type##type_size##_t *) _in2, \
316
+ *out = (type##type_size##_t *) _out; \
317
+ const int cnt = *count; \
318
+ OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##_t) vsrc, vdst; \
319
+ for (int idx=0; idx < cnt; idx += types_per_step) { \
320
+ svbool_t pred = svwhilelt_b##type_size(idx, cnt); \
321
+ vsrc = svld1(pred, &in1[idx]); \
322
+ vdst = svld1(pred, &in2[idx]); \
323
+ vdst = OP_CONCAT(OMPI_OP_OP_PREPEND, op##_x)(pred, vdst, vsrc); \
324
+ OP_CONCAT(OMPI_OP_OP_PREPEND, st1)(pred, &out[idx], vdst); \
325
+ } \
324
326
}
325
327
#endif /* defined(GENERATE_SVE_CODE) */
326
328
0 commit comments