Skip to content

Commit 1215776

Browse files
committed
Change the loop order in the base MPI_Op to allow for more
optimizations, as discussed in #9717. Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
1 parent f33221f commit 1215776

File tree

1 file changed

+10
-10
lines changed

1 file changed

+10
-10
lines changed

ompi/mca/op/base/op_base_functions.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
int i; \
4646
type *a = (type *) in; \
4747
type *b = (type *) out; \
48-
for (i = 0; i < *count; ++i) { \
48+
for (i = *count; i > 0; i--) { \
4949
*(b++) op *(a++); \
5050
} \
5151
}
@@ -65,7 +65,7 @@
6565
int i; \
6666
type *a = (type *) in; \
6767
type *b = (type *) out; \
68-
for (i = 0; i < *count; ++i) { \
68+
for (i = *count; i > 0; i--) { \
6969
*(b) = current_func(*(b), *(a)); \
7070
++b; \
7171
++a; \
@@ -93,7 +93,7 @@
9393
int i; \
9494
ompi_op_predefined_##type_name##_t *a = (ompi_op_predefined_##type_name##_t*) in; \
9595
ompi_op_predefined_##type_name##_t *b = (ompi_op_predefined_##type_name##_t*) out; \
96-
for (i = 0; i < *count; ++i, ++a, ++b) { \
96+
for (i = *count; i > 0; i--, ++a, ++b) { \
9797
if (a->v op b->v) { \
9898
b->v = a->v; \
9999
b->k = a->k; \
@@ -117,7 +117,7 @@
117117
int i; \
118118
type (*a)[2] = (type (*)[2]) in; \
119119
type (*b)[2] = (type (*)[2]) out; \
120-
for (i = 0; i < *count; ++i, ++a, ++b) { \
120+
for (i = *count; i > 0; i--, ++a, ++b) { \
121121
(*b)[0] += (*a)[0]; \
122122
(*b)[1] += (*a)[1]; \
123123
} \
@@ -138,7 +138,7 @@
138138
type (*a)[2] = (type (*)[2]) in; \
139139
type (*b)[2] = (type (*)[2]) out; \
140140
type c[2]; \
141-
for (i = 0; i < *count; ++i, ++a, ++b) { \
141+
for (i = *count; i > 0; i--, ++a, ++b) { \
142142
c[0] = (*a)[0] * (*b)[0] - (*a)[1] * (*b)[1]; \
143143
c[1] = (*a)[0] * (*b)[1] + (*a)[1] * (*b)[0]; \
144144
(*b)[0] = c[0]; \
@@ -693,7 +693,7 @@ LOC_FUNC(minloc, long_double_int, <)
693693
type *a1 = (type *) in1; \
694694
type *a2 = (type *) in2; \
695695
type *b = (type *) out; \
696-
for (i = 0; i < *count; ++i) { \
696+
for (i = *count; i > 0; i--) { \
697697
*(b++) = *(a1++) op *(a2++); \
698698
} \
699699
}
@@ -715,7 +715,7 @@ LOC_FUNC(minloc, long_double_int, <)
715715
type *a1 = (type *) in1; \
716716
type *a2 = (type *) in2; \
717717
type *b = (type *) out; \
718-
for (i = 0; i < *count; ++i) { \
718+
for (i = *count; i > 0; i--) { \
719719
*(b) = current_func(*(a1), *(a2)); \
720720
++b; \
721721
++a1; \
@@ -748,7 +748,7 @@ LOC_FUNC(minloc, long_double_int, <)
748748
ompi_op_predefined_##type_name##_t *a1 = (ompi_op_predefined_##type_name##_t*) in1; \
749749
ompi_op_predefined_##type_name##_t *a2 = (ompi_op_predefined_##type_name##_t*) in2; \
750750
ompi_op_predefined_##type_name##_t *b = (ompi_op_predefined_##type_name##_t*) out; \
751-
for (i = 0; i < *count; ++i, ++a1, ++a2, ++b ) { \
751+
for (i = *count; i > 0; i--, ++a1, ++a2, ++b ) { \
752752
if (a1->v op a2->v) { \
753753
b->v = a1->v; \
754754
b->k = a1->k; \
@@ -778,7 +778,7 @@ LOC_FUNC(minloc, long_double_int, <)
778778
type (*a1)[2] = (type (*)[2]) in1; \
779779
type (*a2)[2] = (type (*)[2]) in2; \
780780
type (*b)[2] = (type (*)[2]) out; \
781-
for (i = 0; i < *count; ++i, ++a1, ++a2, ++b) { \
781+
for (i = *count; i > 0; i--, ++a1, ++a2, ++b) { \
782782
(*b)[0] = (*a1)[0] + (*a2)[0]; \
783783
(*b)[1] = (*a1)[1] + (*a2)[1]; \
784784
} \
@@ -800,7 +800,7 @@ LOC_FUNC(minloc, long_double_int, <)
800800
type (*a1)[2] = (type (*)[2]) in1; \
801801
type (*a2)[2] = (type (*)[2]) in2; \
802802
type (*b)[2] = (type (*)[2]) out; \
803-
for (i = 0; i < *count; ++i, ++a1, ++a2, ++b) { \
803+
for (i = *count; i > 0; i--, ++a1, ++a2, ++b) { \
804804
(*b)[0] = (*a1)[0] * (*a2)[0] - (*a1)[1] * (*a2)[1]; \
805805
(*b)[1] = (*a1)[0] * (*a2)[1] + (*a1)[1] * (*a2)[0]; \
806806
} \

0 commit comments

Comments
 (0)