Skip to content

Commit 57b95bc

Browse files
committed
coll/tuned: Revert RSB and RS default algorithms
Reduce scatter block and reduce scatter algorithms were hitting correctness issues for non commutative strided tests. We will revert to the original default algorithms for those two collectives (basic linear and non overlapping respectively) in the non commutative op case. See #8010 Signed-off-by: William Zhang <wilzhang@amazon.com>
1 parent eefaadf commit 57b95bc

File tree

1 file changed

+6
-130
lines changed

1 file changed

+6
-130
lines changed

ompi/mca/coll/tuned/coll_tuned_decision_fixed.c

Lines changed: 6 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -883,64 +883,11 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_fixed( const void *sbuf, void *rbuf
883883
* {3, "ring"},
884884
* {4, "butterfly"},
885885
*
886-
* Recursive halving and ring do not support non commutative ops.
886+
* Non commutative algorithm capability needs re-investigation.
887+
* Defaulting to non overlapping for non commutative ops.
887888
*/
888889
if (!ompi_op_is_commute(op)) {
889-
if (communicator_size < 4) {
890-
if (total_dsize < 262144) {
891-
alg = 4;
892-
} else {
893-
alg = 1;
894-
}
895-
} else if (communicator_size < 8) {
896-
if (total_dsize < 16) {
897-
alg = 1;
898-
} else {
899-
alg = 4;
900-
}
901-
} else if (communicator_size < 16) {
902-
if (total_dsize < 32) {
903-
alg = 1;
904-
} else {
905-
alg = 4;
906-
}
907-
} else if (communicator_size < 32) {
908-
if (total_dsize < 64) {
909-
alg = 1;
910-
} else {
911-
alg = 4;
912-
}
913-
} else if (communicator_size < 64) {
914-
if (total_dsize < 128) {
915-
alg = 1;
916-
} else {
917-
alg = 4;
918-
}
919-
} else if (communicator_size < 128) {
920-
if (total_dsize < 256) {
921-
alg = 1;
922-
} else {
923-
alg = 4;
924-
}
925-
} else if (communicator_size < 1024) {
926-
if (total_dsize < 512) {
927-
alg = 1;
928-
} else {
929-
alg = 4;
930-
}
931-
} else if (communicator_size < 2048) {
932-
if (total_dsize < 1024) {
933-
alg = 1;
934-
} else {
935-
alg = 4;
936-
}
937-
} else {
938-
if (total_dsize < 2048) {
939-
alg = 1;
940-
} else {
941-
alg = 4;
942-
}
943-
}
890+
alg = 1;
944891
} else {
945892
if (communicator_size < 4) {
946893
if (total_dsize < 65536) {
@@ -1082,82 +1029,11 @@ int ompi_coll_tuned_reduce_scatter_block_intra_dec_fixed(const void *sbuf, void
10821029
* {3, "recursive_halving"},
10831030
* {4, "butterfly"},
10841031
*
1085-
* Only recursive halving does not support non commutative ops.
1032+
* Non commutative algorithm capability needs re-investigation.
1033+
* Defaulting to basic linear for non commutative ops.
10861034
*/
10871035
if( !ompi_op_is_commute(op) ) {
1088-
if (communicator_size < 4) {
1089-
if (total_dsize < 4) {
1090-
alg = 2;
1091-
} else if (total_dsize < 131072) {
1092-
alg = 4;
1093-
} else {
1094-
alg = 1;
1095-
}
1096-
} else if (communicator_size < 8) {
1097-
if (total_dsize < 4) {
1098-
alg = 1;
1099-
} else if (total_dsize < 32) {
1100-
alg = 2;
1101-
} else if (total_dsize < 1048576) {
1102-
alg = 4;
1103-
} else {
1104-
alg = 1;
1105-
}
1106-
} else if (communicator_size < 16) {
1107-
if (total_dsize < 4) {
1108-
alg = 1;
1109-
} else if (total_dsize < 524288) {
1110-
alg = 4;
1111-
} else if (total_dsize < 4194304) {
1112-
alg = 1;
1113-
} else {
1114-
alg = 4;
1115-
}
1116-
} else if (communicator_size < 32) {
1117-
if (total_dsize < 128) {
1118-
alg = 1;
1119-
} else if (total_dsize < 262144) {
1120-
alg = 4;
1121-
} else if (total_dsize < 2097152) {
1122-
alg = 1;
1123-
} else {
1124-
alg = 4;
1125-
}
1126-
} else if (communicator_size < 64) {
1127-
if (total_dsize < 64) {
1128-
alg = 1;
1129-
} else if (total_dsize < 65536) {
1130-
alg = 4;
1131-
} else if (total_dsize < 1048576) {
1132-
alg = 1;
1133-
} else {
1134-
alg = 4;
1135-
}
1136-
} else if (communicator_size < 128) {
1137-
if (total_dsize < 4) {
1138-
alg = 4;
1139-
} else if (total_dsize < 64) {
1140-
alg = 1;
1141-
} else if (total_dsize < 131072) {
1142-
alg = 4;
1143-
} else if (total_dsize < 524288) {
1144-
alg = 1;
1145-
} else {
1146-
alg = 4;
1147-
}
1148-
} else {
1149-
if (total_dsize < 4) {
1150-
alg = 4;
1151-
} else if (total_dsize < 16) {
1152-
alg = 1;
1153-
} else if (total_dsize < 65536) {
1154-
alg = 4;
1155-
} else if (total_dsize < 262144) {
1156-
alg = 1;
1157-
} else {
1158-
alg = 4;
1159-
}
1160-
}
1036+
alg = 1;
11611037
} else {
11621038
if (communicator_size < 4) {
11631039
if (total_dsize < 4) {

0 commit comments

Comments
 (0)