Skip to content

Commit 619e2e3

Browse files
committed
apply SVE_ATTR macro in C source for conditional +sve targeting
- Ensures that SVE-specific attributes are only applied when OMPI_MCA_OP_SVE_EXTRA_FLAGS is set, avoiding illegal instructions on non-SVE builds Signed-off-by: Marco Vogel <marco.vogel@fernuni-hagen.de>
1 parent 717ec7b commit 619e2e3

File tree

2 files changed

+39
-37
lines changed

2 files changed

+39
-37
lines changed

ompi/mca/op/aarch64/op_aarch64_component.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ static int mca_op_aarch64_component_close(void)
101101
/*
102102
* Register MCA params.
103103
*/
104-
static int mca_op_aarch64_component_register(void)
104+
SVE_ATTR static int mca_op_aarch64_component_register(void)
105105
{
106106

107107
mca_op_aarch64_component.hardware_available = 1; /* Check for Neon */

ompi/mca/op/aarch64/op_aarch64_functions.c

Lines changed: 38 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -136,24 +136,25 @@ _Generic((*(out)), \
136136
} \
137137
}
138138
#elif defined(GENERATE_SVE_CODE)
139-
#define OP_AARCH64_FUNC(name, type_name, type_size, type_cnt, type, op) \
139+
#define OP_AARCH64_FUNC(name, type_name, type_size, type_cnt, type, op) \
140+
SVE_ATTR \
140141
static void OP_CONCAT(ompi_op_aarch64_2buff_##name##_##type##type_size##_t, APPEND) \
141-
(const void *_in, void *_out, int *count, \
142-
struct ompi_datatype_t **dtype, \
143-
struct ompi_op_base_module_1_0_0_t *module) \
144-
{ \
145-
const int types_per_step = svcnt(*((type##type_size##_t *) _in)); \
146-
const int cnt = *count; \
147-
type##type_size##_t *in = (type##type_size##_t *) _in, \
148-
*out = (type##type_size##_t *) _out; \
149-
OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##_t) vsrc, vdst; \
150-
for (int idx=0; idx < cnt; idx += types_per_step) { \
151-
svbool_t pred = svwhilelt_b##type_size(idx, cnt); \
152-
vsrc = svld1(pred, &in[idx]); \
153-
vdst = svld1(pred, &out[idx]); \
154-
vdst = OP_CONCAT(OMPI_OP_OP_PREPEND, op##_x)(pred, vdst, vsrc); \
155-
OP_CONCAT(OMPI_OP_OP_PREPEND, st1)(pred, &out[idx], vdst); \
156-
} \
142+
(const void *_in, void *_out, int *count, \
143+
struct ompi_datatype_t **dtype, \
144+
struct ompi_op_base_module_1_0_0_t *module) \
145+
{ \
146+
const int types_per_step = svcnt(*((type##type_size##_t *) _in)); \
147+
const int cnt = *count; \
148+
type##type_size##_t *in = (type##type_size##_t *) _in, \
149+
*out = (type##type_size##_t *) _out; \
150+
OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##_t) vsrc, vdst; \
151+
for (int idx=0; idx < cnt; idx += types_per_step) { \
152+
svbool_t pred = svwhilelt_b##type_size(idx, cnt); \
153+
vsrc = svld1(pred, &in[idx]); \
154+
vdst = svld1(pred, &out[idx]); \
155+
vdst = OP_CONCAT(OMPI_OP_OP_PREPEND, op##_x)(pred, vdst, vsrc); \
156+
OP_CONCAT(OMPI_OP_OP_PREPEND, st1)(pred, &out[idx], vdst); \
157+
} \
157158
}
158159
#endif
159160

@@ -302,25 +303,26 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
302303
} \
303304
}
304305
#elif defined(GENERATE_SVE_CODE)
305-
#define OP_AARCH64_FUNC_3BUFF(name, type_name, type_size, type_cnt, type, op) \
306-
static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPEND) \
307-
(const void *_in1, const void *_in2, void *_out, int *count, \
308-
struct ompi_datatype_t **dtype, \
309-
struct ompi_op_base_module_1_0_0_t *module) \
310-
{ \
311-
const int types_per_step = svcnt(*((type##type_size##_t *) _in1)); \
312-
type##type_size##_t *in1 = (type##type_size##_t *) _in1, \
313-
*in2 = (type##type_size##_t *) _in2, \
314-
*out = (type##type_size##_t *) _out; \
315-
const int cnt = *count; \
316-
OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##_t) vsrc, vdst; \
317-
for (int idx=0; idx < cnt; idx += types_per_step) { \
318-
svbool_t pred = svwhilelt_b##type_size(idx, cnt); \
319-
vsrc = svld1(pred, &in1[idx]); \
320-
vdst = svld1(pred, &in2[idx]); \
321-
vdst = OP_CONCAT(OMPI_OP_OP_PREPEND, op##_x)(pred, vdst, vsrc); \
322-
OP_CONCAT(OMPI_OP_OP_PREPEND, st1)(pred, &out[idx], vdst); \
323-
} \
306+
#define OP_AARCH64_FUNC_3BUFF(name, type_name, type_size, type_cnt, type, op) \
307+
SVE_ATTR \
308+
static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPEND) \
309+
(const void *_in1, const void *_in2, void *_out, int *count, \
310+
struct ompi_datatype_t **dtype, \
311+
struct ompi_op_base_module_1_0_0_t *module) \
312+
{ \
313+
const int types_per_step = svcnt(*((type##type_size##_t *) _in1)); \
314+
type##type_size##_t *in1 = (type##type_size##_t *) _in1, \
315+
*in2 = (type##type_size##_t *) _in2, \
316+
*out = (type##type_size##_t *) _out; \
317+
const int cnt = *count; \
318+
OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##_t) vsrc, vdst; \
319+
for (int idx=0; idx < cnt; idx += types_per_step) { \
320+
svbool_t pred = svwhilelt_b##type_size(idx, cnt); \
321+
vsrc = svld1(pred, &in1[idx]); \
322+
vdst = svld1(pred, &in2[idx]); \
323+
vdst = OP_CONCAT(OMPI_OP_OP_PREPEND, op##_x)(pred, vdst, vsrc); \
324+
OP_CONCAT(OMPI_OP_OP_PREPEND, st1)(pred, &out[idx], vdst); \
325+
} \
324326
}
325327
#endif /* defined(GENERATE_SVE_CODE) */
326328

0 commit comments

Comments
 (0)