Skip to content

Commit 7b9e74c

Browse files
committed
coll/HAN: Add support for XHC on the intra-comm
Signed-off-by: George Katevenis <gkatev@ics.forth.gr>
1 parent 7b59f8e commit 7b9e74c

File tree

4 files changed

+23
-9
lines changed

4 files changed

+23
-9
lines changed

ompi/mca/coll/han/coll_han.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
* Copyright (c) 2020-2022 Bull S.A.S. All rights reserved.
77
* Copyright (c) Amazon.com, Inc. or its affiliates.
88
* All rights reserved.
9+
* Copyright (c) 2023 Computer Architecture and VLSI Systems (CARV)
10+
* Laboratory, ICS Forth. All rights reserved.
911
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
1012
* $COPYRIGHT$
1113
*
@@ -47,11 +49,11 @@
4749

4850
/*
4951
* Today;
50-
* . only 2 modules available for intranode (low) level
52+
* . 3 modules available for intranode (low) level
5153
* . only 2 modules available for internode (up) level
5254
*/
5355

54-
#define COLL_HAN_LOW_MODULES 2
56+
#define COLL_HAN_LOW_MODULES 3
5557
#define COLL_HAN_UP_MODULES 2
5658

5759
struct mca_coll_han_bcast_args_s {

ompi/mca/coll/han/coll_han_component.c

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
* reserved.
55
* Copyright (c) 2022 IBM Corporation. All rights reserved
66
* Copyright (c) 2020-2022 Bull S.A.S. All rights reserved.
7+
* Copyright (c) 2023 Computer Architecture and VLSI Systems (CARV)
8+
* Laboratory, ICS Forth. All rights reserved.
79
* $COPYRIGHT$
810
*
911
* Additional copyrights may follow
@@ -43,7 +45,8 @@ ompi_coll_han_components ompi_coll_han_available_components[COMPONENTS_COUNT] =
4345
{ TUNED, "tuned" },
4446
{ SM, "sm" }, /* this should not be used, the collective component is gone */
4547
{ ADAPT, "adapt" },
46-
{ HAN, "han" }
48+
{ HAN, "han" },
49+
{ XHC, "xhc" }
4750
};
4851

4952
/*
@@ -287,7 +290,7 @@ static int han_register(void)
287290

288291
cs->han_bcast_low_module = 0;
289292
(void) mca_coll_han_query_module_from_mca(c, "bcast_low_module",
290-
"low level module for bcast, currently only 0 for tuned",
293+
"low level module for bcast, 0 tuned, 2 xhc",
291294
OPAL_INFO_LVL_9,
292295
&cs->han_bcast_low_module,
293296
&cs->han_op_module_name.bcast.han_op_low_module_name);
@@ -307,7 +310,7 @@ static int han_register(void)
307310

308311
cs->han_reduce_low_module = 0;
309312
(void) mca_coll_han_query_module_from_mca(c, "reduce_low_module",
310-
"low level module for allreduce, currently only 0 tuned",
313+
"low level module for allreduce, 0 tuned, 2 xhc",
311314
OPAL_INFO_LVL_9, &cs->han_reduce_low_module,
312315
&cs->han_op_module_name.reduce.han_op_low_module_name);
313316

@@ -326,7 +329,7 @@ static int han_register(void)
326329

327330
cs->han_allreduce_low_module = 0;
328331
(void) mca_coll_han_query_module_from_mca(c, "allreduce_low_module",
329-
"low level module for allreduce, currently only 0 tuned",
332+
"low level module for allreduce, 0 tuned, 2 xhc",
330333
OPAL_INFO_LVL_9, &cs->han_allreduce_low_module,
331334
&cs->han_op_module_name.allreduce.han_op_low_module_name);
332335

@@ -338,7 +341,7 @@ static int han_register(void)
338341

339342
cs->han_allgather_low_module = 0;
340343
(void) mca_coll_han_query_module_from_mca(c, "allgather_low_module",
341-
"low level module for allgather, currently only 0 tuned",
344+
"low level module for allgather, 0 tuned, 2 xhc",
342345
OPAL_INFO_LVL_9, &cs->han_allgather_low_module,
343346
&cs->han_op_module_name.allgather.han_op_low_module_name);
344347

@@ -350,7 +353,7 @@ static int han_register(void)
350353

351354
cs->han_gather_low_module = 0;
352355
(void) mca_coll_han_query_module_from_mca(c, "gather_low_module",
353-
"low level module for gather, currently only 0 tuned",
356+
"low level module for gather, 0 tuned, 2 xhc",
354357
OPAL_INFO_LVL_9, &cs->han_gather_low_module,
355358
&cs->han_op_module_name.gather.han_op_low_module_name);
356359

@@ -374,7 +377,7 @@ static int han_register(void)
374377

375378
cs->han_scatter_low_module = 0;
376379
(void) mca_coll_han_query_module_from_mca(c, "scatter_low_module",
377-
"low level module for scatter, currently only 0 tuned",
380+
"low level module for scatter, 0 tuned, 2 xhc",
378381
OPAL_INFO_LVL_9, &cs->han_scatter_low_module,
379382
&cs->han_op_module_name.scatter.han_op_low_module_name);
380383

ompi/mca/coll/han/coll_han_dynamic.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
* reserved.
66
* Copyright (c) 2020 Bull S.A.S. All rights reserved.
77
* Copyright (c) 2022 IBM Corporation. All rights reserved
8+
* Copyright (c) 2023 Computer Architecture and VLSI Systems (CARV)
9+
* Laboratory, ICS Forth. All rights reserved.
810
*
911
* $COPYRIGHT$
1012
*
@@ -105,6 +107,7 @@ typedef enum COMPONENTS {
105107
SM,
106108
ADAPT,
107109
HAN,
110+
XHC,
108111
COMPONENTS_COUNT
109112
} COMPONENT_T;
110113

ompi/mca/coll/han/coll_han_subcomms.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
* of Tennessee Research Foundation. All rights
44
* reserved.
55
* Copyright (c) 2020 Bull S.A.S. All rights reserved.
6+
* Copyright (c) 2023 Computer Architecture and VLSI Systems (CARV)
7+
* Laboratory, ICS Forth. All rights reserved.
68
*
79
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
810
* $COPYRIGHT$
@@ -314,6 +316,10 @@ int mca_coll_han_comm_create(struct ompi_communicator_t *comm,
314316
&comm_info, &(low_comms[1]));
315317
assert(OMPI_COMM_IS_DISJOINT_SET(low_comms[1]) && !OMPI_COMM_IS_DISJOINT(low_comms[1]));
316318

319+
opal_info_set(&comm_info, "ompi_comm_coll_preference", "xhc,^han");
320+
ompi_comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0,
321+
&comm_info, &(low_comms[2]));
322+
317323
/*
318324
* Upgrade libnbc module priority to set up up_comms[0] with libnbc module
319325
* This sub-communicator contains one process per node: processes with the

0 commit comments

Comments
 (0)