19
19
* Copyright (C) 2018 Mellanox Technologies, Ltd.
20
20
* All rights reserved.
21
21
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
22
- * Copyright (c) 2019 IBM Corporation. All rights reserved.
22
+ * Copyright (c) 2019-2021 IBM Corporation. All rights reserved.
23
23
* Copyright (c) 2019-2020 Inria. All rights reserved.
24
24
* $COPYRIGHT$
25
25
*
@@ -294,6 +294,18 @@ int opal_hwloc_base_get_topology(void)
294
294
wildcard_rank .jobid = OPAL_PROC_MY_NAME .jobid ;
295
295
wildcard_rank .vpid = OPAL_VPID_WILDCARD ;
296
296
297
+ // Did the user ask for a topo file at the mca line?
298
+ // Check this first, before main methods.
299
+ if (NULL != opal_hwloc_base_topo_file ) {
300
+ opal_output_verbose (1 , opal_hwloc_base_framework .framework_output ,
301
+ "hwloc:base loading topology from file %s" ,
302
+ opal_hwloc_base_topo_file );
303
+ if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology (opal_hwloc_base_topo_file ))) {
304
+ return rc ;
305
+ }
306
+ goto done ;
307
+ }
308
+
297
309
#if HWLOC_API_VERSION >= 0x20000
298
310
opal_output_verbose (2 , opal_hwloc_base_framework .framework_output ,
299
311
"hwloc:base: looking for topology in shared memory" );
@@ -337,7 +349,7 @@ int opal_hwloc_base_get_topology(void)
337
349
opal_output_verbose (2 , opal_hwloc_base_framework .framework_output ,
338
350
"hwloc:base: topology in shared memory" );
339
351
topo_in_shmem = true;
340
- return OPAL_SUCCESS ;
352
+ goto done ;
341
353
}
342
354
}
343
355
#endif
@@ -394,7 +406,7 @@ int opal_hwloc_base_get_topology(void)
394
406
hwloc_topology_destroy (opal_hwloc_topology );
395
407
return rc ;
396
408
}
397
- } else if ( NULL == opal_hwloc_base_topo_file ) {
409
+ } else {
398
410
opal_output_verbose (1 , opal_hwloc_base_framework .framework_output ,
399
411
"hwloc:base discovering topology" );
400
412
if (0 != hwloc_topology_init (& opal_hwloc_topology ) ||
@@ -408,15 +420,10 @@ int opal_hwloc_base_get_topology(void)
408
420
hwloc_topology_destroy (opal_hwloc_topology );
409
421
return rc ;
410
422
}
411
- } else {
412
- opal_output_verbose (1 , opal_hwloc_base_framework .framework_output ,
413
- "hwloc:base loading topology from file %s" ,
414
- opal_hwloc_base_topo_file );
415
- if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology (opal_hwloc_base_topo_file ))) {
416
- return rc ;
417
- }
418
423
}
419
424
425
+ done :
426
+
420
427
/* fill opal_cache_line_size global with the smallest L1 cache
421
428
line size */
422
429
fill_cache_line_size ();
@@ -659,9 +666,11 @@ static hwloc_obj_t df_search(hwloc_topology_t topo,
659
666
// available = opal_hwloc_base_get_available_cpus(topo, start)
660
667
// and skipped objs that had hwloc_bitmap_iszero(available)
661
668
hwloc_obj_t root ;
662
- opal_hwloc_topo_data_t * rdata ;
669
+ opal_hwloc_topo_data_t * rdata = NULL ;
663
670
root = hwloc_get_root_obj (topo );
664
- rdata = (opal_hwloc_topo_data_t * )root -> userdata ;
671
+ if (false == topo_in_shmem ) {
672
+ rdata = (opal_hwloc_topo_data_t * )root -> userdata ;
673
+ }
665
674
hwloc_cpuset_t constrained_cpuset ;
666
675
667
676
constrained_cpuset = hwloc_bitmap_alloc ();
@@ -696,7 +705,7 @@ unsigned int opal_hwloc_base_get_nbobjs_by_type(hwloc_topology_t topo,
696
705
unsigned int num_objs ;
697
706
hwloc_obj_t obj ;
698
707
opal_hwloc_summary_t * sum ;
699
- opal_hwloc_topo_data_t * data ;
708
+ opal_hwloc_topo_data_t * data = NULL ;
700
709
int rc ;
701
710
702
711
/* bozo check */
@@ -728,10 +737,17 @@ unsigned int opal_hwloc_base_get_nbobjs_by_type(hwloc_topology_t topo,
728
737
obj = hwloc_get_root_obj (topo );
729
738
730
739
/* first see if the topology already has this summary */
731
- data = (opal_hwloc_topo_data_t * )obj -> userdata ;
740
+ if (false == topo_in_shmem ) {
741
+ data = (opal_hwloc_topo_data_t * )obj -> userdata ;
742
+ }
732
743
if (NULL == data ) {
733
744
data = OBJ_NEW (opal_hwloc_topo_data_t );
734
- obj -> userdata = (void * )data ;
745
+ if (false == topo_in_shmem ) {
746
+ // Can't touch userdata if in read-only shmem!
747
+ // We have to protect here for the case where obj->userdata
748
+ // is in shmem and it is NULL.
749
+ obj -> userdata = (void * ) data ;
750
+ }
735
751
} else {
736
752
OPAL_LIST_FOREACH (sum , & data -> summaries , opal_hwloc_summary_t ) {
737
753
if (target == sum -> type &&
@@ -1167,8 +1183,6 @@ int opal_hwloc_base_cset2str(char *str, int len,
1167
1183
char tmp [BUFSIZ ];
1168
1184
const int stmp = sizeof (tmp ) - 1 ;
1169
1185
int * * map = NULL ;
1170
- hwloc_obj_t root ;
1171
- opal_hwloc_topo_data_t * sum ;
1172
1186
1173
1187
str [0 ] = tmp [stmp ] = '\0' ;
1174
1188
@@ -1177,18 +1191,6 @@ int opal_hwloc_base_cset2str(char *str, int len,
1177
1191
return OPAL_ERR_NOT_BOUND ;
1178
1192
}
1179
1193
1180
- /* if the cpuset includes all available cpus, then we are unbound */
1181
- root = hwloc_get_root_obj (topo );
1182
- if (NULL != root -> userdata ) {
1183
- sum = (opal_hwloc_topo_data_t * )root -> userdata ;
1184
- if (NULL == sum -> available ) {
1185
- return OPAL_ERROR ;
1186
- }
1187
- if (0 != hwloc_bitmap_isincluded (sum -> available , cpuset )) {
1188
- return OPAL_ERR_NOT_BOUND ;
1189
- }
1190
- }
1191
-
1192
1194
if (OPAL_SUCCESS != (ret = build_map (& num_sockets , & num_cores , cpuset , & map , topo ))) {
1193
1195
return ret ;
1194
1196
}
@@ -1235,8 +1237,6 @@ int opal_hwloc_base_cset2mapstr(char *str, int len,
1235
1237
int core_index , pu_index ;
1236
1238
const int stmp = sizeof (tmp ) - 1 ;
1237
1239
hwloc_obj_t socket , core , pu ;
1238
- hwloc_obj_t root ;
1239
- opal_hwloc_topo_data_t * sum ;
1240
1240
1241
1241
str [0 ] = tmp [stmp ] = '\0' ;
1242
1242
@@ -1245,18 +1245,6 @@ int opal_hwloc_base_cset2mapstr(char *str, int len,
1245
1245
return OPAL_ERR_NOT_BOUND ;
1246
1246
}
1247
1247
1248
- /* if the cpuset includes all available cpus, then we are unbound */
1249
- root = hwloc_get_root_obj (topo );
1250
- if (NULL != root -> userdata ) {
1251
- sum = (opal_hwloc_topo_data_t * )root -> userdata ;
1252
- if (NULL == sum -> available ) {
1253
- return OPAL_ERROR ;
1254
- }
1255
- if (0 != hwloc_bitmap_isincluded (sum -> available , cpuset )) {
1256
- return OPAL_ERR_NOT_BOUND ;
1257
- }
1258
- }
1259
-
1260
1248
/* Iterate over all existing sockets */
1261
1249
for (socket = hwloc_get_obj_by_type (topo , HWLOC_OBJ_SOCKET , 0 );
1262
1250
NULL != socket ;
0 commit comments