14
14
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
15
15
* Copyright (c) 2010-2017 Los Alamos National Security, LLC. All rights
16
16
* reserved.
17
- * Copyright (c) 2012-2023 NVIDIA Corporation. All rights reserved.
17
+ * Copyright (c) 2012-2024 NVIDIA Corporation. All rights reserved.
18
18
* Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
19
19
* Copyright (c) 2014-2017 Research Organization for Information Science
20
20
* and Technology (RIST). All rights reserved.
@@ -216,56 +216,52 @@ static int smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, int32_t my_s
216
216
{
217
217
size_t length , length_payload ;
218
218
sm_fifo_t * my_fifos ;
219
- int my_mem_node , num_mem_nodes , i , rc ;
220
219
mca_common_sm_mpool_resources_t * res = NULL ;
221
- mca_btl_smcuda_component_t * m = & mca_btl_smcuda_component ;
222
220
char * loc , * mynuma ;
223
221
opal_process_name_t wildcard_rank ;
222
+ int rc ;
224
223
225
224
/* Assume we don't have hwloc support and fill in dummy info */
226
- mca_btl_smcuda_component .mem_node = my_mem_node = 0 ;
227
- mca_btl_smcuda_component .num_mem_nodes = num_mem_nodes = 1 ;
225
+ mca_btl_smcuda_component .mem_node = -1 ;
226
+ mca_btl_smcuda_component .num_mem_nodes = 1 ;
228
227
229
228
/* see if we were given a topology signature */
230
229
wildcard_rank .jobid = OPAL_PROC_MY_NAME .jobid ;
231
230
wildcard_rank .vpid = OPAL_VPID_WILDCARD ;
232
231
OPAL_MODEX_RECV_VALUE_OPTIONAL (rc , PMIX_TOPOLOGY_SIGNATURE , & wildcard_rank , & loc , PMIX_STRING );
233
232
if (OPAL_SUCCESS == rc ) {
234
233
/* the number of NUMA nodes is right at the front */
235
- mca_btl_smcuda_component .num_mem_nodes = num_mem_nodes = strtoul (loc , NULL , 10 );
234
+ mca_btl_smcuda_component .num_mem_nodes = strtoul (loc , NULL , 10 );
236
235
free (loc );
237
236
} else {
238
237
/* If we have hwloc support, then get accurate information */
239
238
loc = NULL ;
240
239
if (OPAL_SUCCESS == opal_hwloc_base_get_topology ()) {
241
- i = opal_hwloc_base_get_nbobjs_by_type (opal_hwloc_topology , HWLOC_OBJ_NODE , 0 ,
242
- OPAL_HWLOC_AVAILABLE );
240
+ rc = opal_hwloc_base_get_nbobjs_by_type (opal_hwloc_topology , HWLOC_OBJ_NODE , 0 ,
241
+ OPAL_HWLOC_AVAILABLE );
243
242
244
243
/* JMS This tells me how many numa nodes are *available*,
245
244
but it's not how many are being used *by this job*.
246
245
Note that this is the value we've previously used (from
247
246
the previous carto-based implementation), but it really
248
247
should be improved to be how many NUMA nodes are being
249
248
used *in this job*. */
250
- mca_btl_smcuda_component .num_mem_nodes = num_mem_nodes = i ;
249
+ mca_btl_smcuda_component .num_mem_nodes = rc ;
251
250
}
252
251
}
253
252
/* see if we were given our location */
254
253
OPAL_MODEX_RECV_VALUE_OPTIONAL (rc , PMIX_LOCALITY_STRING , & OPAL_PROC_MY_NAME , & loc , PMIX_STRING );
255
254
if (OPAL_SUCCESS == rc ) {
256
- if (NULL == loc ) {
257
- mca_btl_smcuda_component .mem_node = my_mem_node = -1 ;
258
- } else {
255
+ if (NULL != loc ) {
259
256
/* get our NUMA location */
260
257
mynuma = opal_hwloc_base_get_location (loc , HWLOC_OBJ_NODE , 0 );
261
258
if (NULL == mynuma || NULL != strchr (mynuma , ',' ) || NULL != strchr (mynuma , '-' )) {
262
259
/* we either have no idea what NUMA we are on, or we
263
260
* are on multiple NUMA nodes */
264
- mca_btl_smcuda_component .mem_node = my_mem_node = -1 ;
261
+ mca_btl_smcuda_component .mem_node = -1 ;
265
262
} else {
266
263
/* we are bound to a single NUMA node */
267
- my_mem_node = strtoul (mynuma , NULL , 10 );
268
- mca_btl_smcuda_component .mem_node = my_mem_node ;
264
+ mca_btl_smcuda_component .mem_node = strtoul (mynuma , NULL , 10 );
269
265
}
270
266
if (NULL != mynuma ) {
271
267
free (mynuma );
@@ -274,14 +270,14 @@ static int smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, int32_t my_s
274
270
}
275
271
} else {
276
272
/* If we have hwloc support, then get accurate information */
277
- if (OPAL_SUCCESS == opal_hwloc_base_get_topology () && num_mem_nodes > 0
273
+ if (OPAL_SUCCESS == opal_hwloc_base_get_topology () && mca_btl_smcuda_component . num_mem_nodes > 0
278
274
&& NULL != opal_process_info .cpuset ) {
279
275
int numa = 0 , w ;
280
276
unsigned n_bound = 0 ;
281
277
hwloc_obj_t obj ;
282
278
283
279
/* count the number of NUMA nodes to which we are bound */
284
- for (w = 0 ; w < i ; w ++ ) {
280
+ for (w = 0 ; w < mca_btl_smcuda_component . num_mem_nodes ; w ++ ) {
285
281
if (NULL
286
282
== (obj = opal_hwloc_base_get_obj_by_type (opal_hwloc_topology , HWLOC_OBJ_NODE ,
287
283
0 , w , OPAL_HWLOC_AVAILABLE ))) {
@@ -297,27 +293,35 @@ static int smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, int32_t my_s
297
293
* a NUMA we are on, then not much we can do
298
294
*/
299
295
if (1 == n_bound ) {
300
- mca_btl_smcuda_component .mem_node = my_mem_node = numa ;
301
- } else {
302
- mca_btl_smcuda_component .mem_node = my_mem_node = -1 ;
296
+ mca_btl_smcuda_component .mem_node = numa ;
303
297
}
304
298
}
305
299
}
300
+ /* sanity check: do we have the NUMA node info ? */
301
+ if ( mca_btl_smcuda_component .mem_node < 0 ||
302
+ mca_btl_smcuda_component .num_mem_nodes < 1 ) {
303
+ opal_output_verbose (10 , opal_btl_base_framework .framework_output ,
304
+ "btl:smcuda: %s unable to find topological information mem_node=%d, num_mem_nodes=%d" ,
305
+ OPAL_NAME_PRINT (OPAL_PROC_MY_NAME ),
306
+ mca_btl_smcuda_component .mem_node , mca_btl_smcuda_component .num_mem_nodes );
307
+ mca_btl_smcuda_component .mem_node = 0 ;
308
+ mca_btl_smcuda_component .num_mem_nodes = 1 ;
309
+ }
306
310
307
311
if (NULL == (res = calloc (1 , sizeof (* res )))) {
308
312
return OPAL_ERR_OUT_OF_RESOURCE ;
309
313
}
310
314
311
315
/* lookup shared memory pool */
312
316
mca_btl_smcuda_component .sm_mpools = (mca_mpool_base_module_t * * )
313
- calloc (num_mem_nodes , sizeof (mca_mpool_base_module_t * ));
317
+ calloc (mca_btl_smcuda_component . num_mem_nodes , sizeof (mca_mpool_base_module_t * ));
314
318
315
319
/* Disable memory binding, because each MPI process will claim pages in the
316
320
* mpool for their local NUMA node */
317
321
res -> mem_node = -1 ;
318
322
res -> allocator = mca_btl_smcuda_component .allocator ;
319
323
320
- if (OPAL_SUCCESS != (rc = setup_mpool_base_resources (m , res ))) {
324
+ if (OPAL_SUCCESS != (rc = setup_mpool_base_resources (& mca_btl_smcuda_component , res ))) {
321
325
free (res );
322
326
return rc ;
323
327
}
@@ -344,7 +348,7 @@ static int smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, int32_t my_s
344
348
345
349
/* remember that node rank zero is already attached */
346
350
if (0 != my_smp_rank ) {
347
- if (OPAL_SUCCESS != (rc = sm_segment_attach (m ))) {
351
+ if (OPAL_SUCCESS != (rc = sm_segment_attach (& mca_btl_smcuda_component ))) {
348
352
free (res );
349
353
return rc ;
350
354
}
@@ -357,7 +361,7 @@ static int smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, int32_t my_s
357
361
"btl:smcuda: host_register address=%p, size=%d" ,
358
362
mca_btl_smcuda_component .sm_mpool_base , (int ) res -> size );
359
363
if (0 != strcmp (opal_accelerator_base_selected_component .base_version .mca_component_name , "null" )) {
360
- rc = opal_accelerator .host_register (MCA_ACCELERATOR_NO_DEVICE_ID , mca_btl_smcuda_component .sm_mpool_base , res -> size );
364
+ rc = opal_accelerator .host_register (MCA_ACCELERATOR_NO_DEVICE_ID , mca_btl_smcuda_component .sm_mpool_base , res -> size );
361
365
if (OPAL_UNLIKELY (OPAL_SUCCESS != rc )) {
362
366
/* If registering the memory fails, print a message and continue.
363
367
* This is not a fatal error. */
@@ -394,7 +398,7 @@ static int smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, int32_t my_s
394
398
mca_btl_smcuda_component .shm_bases [mca_btl_smcuda_component .my_smp_rank ]
395
399
= (char * ) mca_btl_smcuda_component .sm_mpool_base ;
396
400
mca_btl_smcuda_component .shm_mem_nodes [mca_btl_smcuda_component .my_smp_rank ] = (uint16_t )
397
- my_mem_node ;
401
+ mca_btl_smcuda_component . mem_node ;
398
402
399
403
/* initialize the array of fifo's "owned" by this process */
400
404
if (NULL == (my_fifos = (sm_fifo_t * ) mpool_calloc (FIFO_MAP_NUM (n ), sizeof (sm_fifo_t ))))
@@ -420,45 +424,45 @@ static int smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, int32_t my_s
420
424
/* allocation will be for the fragment descriptor and payload buffer */
421
425
length = sizeof (mca_btl_smcuda_frag1_t );
422
426
length_payload = sizeof (mca_btl_smcuda_hdr_t ) + mca_btl_smcuda_component .eager_limit ;
423
- i = opal_free_list_init (& mca_btl_smcuda_component .sm_frags_eager , length , opal_cache_line_size ,
424
- OBJ_CLASS (mca_btl_smcuda_frag1_t ), length_payload , opal_cache_line_size ,
425
- mca_btl_smcuda_component .sm_free_list_num ,
426
- mca_btl_smcuda_component .sm_free_list_max ,
427
- mca_btl_smcuda_component .sm_free_list_inc ,
428
- mca_btl_smcuda_component .sm_mpool , 0 , NULL , NULL , NULL );
429
- if (OPAL_SUCCESS != i )
430
- return i ;
427
+ rc = opal_free_list_init (& mca_btl_smcuda_component .sm_frags_eager , length , opal_cache_line_size ,
428
+ OBJ_CLASS (mca_btl_smcuda_frag1_t ), length_payload , opal_cache_line_size ,
429
+ mca_btl_smcuda_component .sm_free_list_num ,
430
+ mca_btl_smcuda_component .sm_free_list_max ,
431
+ mca_btl_smcuda_component .sm_free_list_inc ,
432
+ mca_btl_smcuda_component .sm_mpool , 0 , NULL , NULL , NULL );
433
+ if (OPAL_SUCCESS != rc )
434
+ return rc ;
431
435
432
436
length = sizeof (mca_btl_smcuda_frag2_t );
433
437
length_payload = sizeof (mca_btl_smcuda_hdr_t ) + mca_btl_smcuda_component .max_frag_size ;
434
- i = opal_free_list_init (& mca_btl_smcuda_component .sm_frags_max , length , opal_cache_line_size ,
435
- OBJ_CLASS (mca_btl_smcuda_frag2_t ), length_payload , opal_cache_line_size ,
436
- mca_btl_smcuda_component .sm_free_list_num ,
437
- mca_btl_smcuda_component .sm_free_list_max ,
438
- mca_btl_smcuda_component .sm_free_list_inc ,
439
- mca_btl_smcuda_component .sm_mpool , 0 , NULL , NULL , NULL );
440
- if (OPAL_SUCCESS != i )
441
- return i ;
442
-
443
- i = opal_free_list_init (& mca_btl_smcuda_component .sm_frags_user , sizeof (mca_btl_smcuda_user_t ),
444
- opal_cache_line_size , OBJ_CLASS (mca_btl_smcuda_user_t ),
445
- sizeof (mca_btl_smcuda_hdr_t ), opal_cache_line_size ,
446
- mca_btl_smcuda_component .sm_free_list_num ,
447
- mca_btl_smcuda_component .sm_free_list_max ,
448
- mca_btl_smcuda_component .sm_free_list_inc ,
449
- mca_btl_smcuda_component .sm_mpool , 0 , NULL , NULL , NULL );
450
- if (OPAL_SUCCESS != i )
451
- return i ;
438
+ rc = opal_free_list_init (& mca_btl_smcuda_component .sm_frags_max , length , opal_cache_line_size ,
439
+ OBJ_CLASS (mca_btl_smcuda_frag2_t ), length_payload , opal_cache_line_size ,
440
+ mca_btl_smcuda_component .sm_free_list_num ,
441
+ mca_btl_smcuda_component .sm_free_list_max ,
442
+ mca_btl_smcuda_component .sm_free_list_inc ,
443
+ mca_btl_smcuda_component .sm_mpool , 0 , NULL , NULL , NULL );
444
+ if (OPAL_SUCCESS != rc )
445
+ return rc ;
446
+
447
+ rc = opal_free_list_init (& mca_btl_smcuda_component .sm_frags_user , sizeof (mca_btl_smcuda_user_t ),
448
+ opal_cache_line_size , OBJ_CLASS (mca_btl_smcuda_user_t ),
449
+ sizeof (mca_btl_smcuda_hdr_t ), opal_cache_line_size ,
450
+ mca_btl_smcuda_component .sm_free_list_num ,
451
+ mca_btl_smcuda_component .sm_free_list_max ,
452
+ mca_btl_smcuda_component .sm_free_list_inc ,
453
+ mca_btl_smcuda_component .sm_mpool , 0 , NULL , NULL , NULL );
454
+ if (OPAL_SUCCESS != rc )
455
+ return rc ;
452
456
453
457
mca_btl_smcuda_component .num_outstanding_frags = 0 ;
454
458
455
459
mca_btl_smcuda_component .num_pending_sends = 0 ;
456
- i = opal_free_list_init (& mca_btl_smcuda_component .pending_send_fl ,
457
- sizeof (btl_smcuda_pending_send_item_t ), 8 ,
458
- OBJ_CLASS (opal_free_list_item_t ), 0 , 0 , 16 , -1 , 32 , NULL , 0 , NULL , NULL ,
459
- NULL );
460
- if (OPAL_SUCCESS != i )
461
- return i ;
460
+ rc = opal_free_list_init (& mca_btl_smcuda_component .pending_send_fl ,
461
+ sizeof (btl_smcuda_pending_send_item_t ), 8 ,
462
+ OBJ_CLASS (opal_free_list_item_t ), 0 , 0 , 16 , -1 , 32 , NULL , 0 , NULL , NULL ,
463
+ NULL );
464
+ if (OPAL_SUCCESS != rc )
465
+ return rc ;
462
466
463
467
/* set flag indicating btl has been inited */
464
468
smcuda_btl -> btl_inited = true;
0 commit comments