@@ -325,18 +325,18 @@ static inline int end_atomicity(
325
325
}
326
326
327
327
static inline int get_dynamic_win_info (uint64_t remote_addr , ompi_osc_ucx_module_t * module ,
328
- int target ) {
328
+ int target , int * win_idx ) {
329
329
uint64_t remote_state_addr = (module -> state_addrs )[target ] + OSC_UCX_STATE_DYNAMIC_WIN_CNT_OFFSET ;
330
- size_t len = sizeof (uint64_t ) + sizeof (ompi_osc_dynamic_win_info_t ) * OMPI_OSC_UCX_ATTACH_MAX ;
331
- char * temp_buf = malloc ( len );
330
+ size_t remote_state_len = sizeof (uint64_t ) + sizeof (ompi_osc_dynamic_win_info_t ) * OMPI_OSC_UCX_ATTACH_MAX ;
331
+ char * temp_buf = calloc ( remote_state_len , 1 );
332
332
ompi_osc_dynamic_win_info_t * temp_dynamic_wins ;
333
333
uint64_t win_count ;
334
- int contain , insert = -1 ;
334
+ int insert = -1 ;
335
335
int ret ;
336
336
337
337
ret = opal_common_ucx_wpmem_putget (module -> state_mem , OPAL_COMMON_UCX_GET , target ,
338
338
(void * )((intptr_t )temp_buf ),
339
- len , remote_state_addr );
339
+ remote_state_len , remote_state_addr );
340
340
if (OPAL_SUCCESS != ret ) {
341
341
OSC_UCX_VERBOSE (1 , "opal_common_ucx_mem_putget failed: %d" , ret );
342
342
ret = OMPI_ERROR ;
@@ -350,23 +350,27 @@ static inline int get_dynamic_win_info(uint64_t remote_addr, ompi_osc_ucx_module
350
350
}
351
351
352
352
memcpy (& win_count , temp_buf , sizeof (uint64_t ));
353
- assert (win_count > 0 && win_count <= OMPI_OSC_UCX_ATTACH_MAX );
353
+ if (win_count > OMPI_OSC_UCX_ATTACH_MAX ) {
354
+ return MPI_ERR_RMA_RANGE ;
355
+ }
354
356
355
357
temp_dynamic_wins = (ompi_osc_dynamic_win_info_t * )(temp_buf + sizeof (uint64_t ));
356
- contain = ompi_osc_find_attached_region_position (temp_dynamic_wins , 0 , win_count ,
358
+ * win_idx = ompi_osc_find_attached_region_position (temp_dynamic_wins , 0 , win_count ,
357
359
remote_addr , 1 , & insert );
358
- assert (contain >= 0 && (uint64_t )contain < win_count );
360
+ if (* win_idx < 0 || (uint64_t )* win_idx >= win_count ) {
361
+ return MPI_ERR_RMA_RANGE ;
362
+ }
359
363
360
- if (module -> local_dynamic_win_info [contain ].mem -> mem_addrs == NULL ) {
361
- module -> local_dynamic_win_info [contain ].mem -> mem_addrs = calloc (ompi_comm_size (module -> comm ),
364
+ if (module -> local_dynamic_win_info [* win_idx ].mem -> mem_addrs == NULL ) {
365
+ module -> local_dynamic_win_info [* win_idx ].mem -> mem_addrs = calloc (ompi_comm_size (module -> comm ),
362
366
OMPI_OSC_UCX_MEM_ADDR_MAX_LEN );
363
- module -> local_dynamic_win_info [contain ].mem -> mem_displs = calloc (ompi_comm_size (module -> comm ),
367
+ module -> local_dynamic_win_info [* win_idx ].mem -> mem_displs = calloc (ompi_comm_size (module -> comm ),
364
368
sizeof (int ));
365
369
}
366
370
367
- memcpy (module -> local_dynamic_win_info [contain ].mem -> mem_addrs + target * OMPI_OSC_UCX_MEM_ADDR_MAX_LEN ,
368
- temp_dynamic_wins [contain ].mem_addr , OMPI_OSC_UCX_MEM_ADDR_MAX_LEN );
369
- module -> local_dynamic_win_info [contain ].mem -> mem_displs [target ] = target * OMPI_OSC_UCX_MEM_ADDR_MAX_LEN ;
371
+ memcpy (module -> local_dynamic_win_info [* win_idx ].mem -> mem_addrs + target * OMPI_OSC_UCX_MEM_ADDR_MAX_LEN ,
372
+ temp_dynamic_wins [* win_idx ].mem_addr , OMPI_OSC_UCX_MEM_ADDR_MAX_LEN );
373
+ module -> local_dynamic_win_info [* win_idx ].mem -> mem_displs [target ] = target * OMPI_OSC_UCX_MEM_ADDR_MAX_LEN ;
370
374
371
375
cleanup :
372
376
free (temp_buf );
@@ -416,17 +420,20 @@ static int do_atomic_op_intrinsic(
416
420
void * result_addr ,
417
421
ompi_osc_ucx_request_t * ucx_req )
418
422
{
419
- int ret = OMPI_SUCCESS ;
423
+ int ret = OMPI_SUCCESS ,
424
+ win_idx = -1 ;
420
425
size_t origin_dt_bytes ;
426
+ opal_common_ucx_wpmem_t * mem = module -> mem ;
421
427
ompi_datatype_type_size (dt , & origin_dt_bytes );
422
428
423
429
uint64_t remote_addr = (module -> addrs [target ]) + target_disp * OSC_UCX_GET_DISP (module , target );
424
430
425
431
if (module -> flavor == MPI_WIN_FLAVOR_DYNAMIC ) {
426
- ret = get_dynamic_win_info (remote_addr , module , target );
432
+ ret = get_dynamic_win_info (remote_addr , module , target , & win_idx );
427
433
if (ret != OMPI_SUCCESS ) {
428
434
return ret ;
429
435
}
436
+ mem = module -> local_dynamic_win_info [win_idx ].mem ;
430
437
}
431
438
432
439
ucp_atomic_fetch_op_t opcode ;
@@ -454,7 +461,7 @@ static int do_atomic_op_intrinsic(
454
461
user_req_ptr = ucx_req ;
455
462
// issue a fence if this is the last but not the only element
456
463
if (0 < i ) {
457
- ret = opal_common_ucx_wpmem_fence (module -> mem );
464
+ ret = opal_common_ucx_wpmem_fence (mem );
458
465
if (ret != OMPI_SUCCESS ) {
459
466
OSC_UCX_VERBOSE (1 , "opal_common_ucx_mem_fence failed: %d" , ret );
460
467
return OMPI_ERROR ;
@@ -466,7 +473,7 @@ static int do_atomic_op_intrinsic(
466
473
} else {
467
474
value = opal_common_ucx_load_uint64 (origin_addr , origin_dt_bytes );
468
475
}
469
- ret = opal_common_ucx_wpmem_fetch_nb (module -> mem , opcode , value , target ,
476
+ ret = opal_common_ucx_wpmem_fetch_nb (mem , opcode , value , target ,
470
477
output_addr , origin_dt_bytes , remote_addr ,
471
478
user_req_cb , user_req_ptr );
472
479
@@ -485,21 +492,23 @@ int ompi_osc_ucx_put(const void *origin_addr, int origin_count, struct ompi_data
485
492
int target , ptrdiff_t target_disp , int target_count ,
486
493
struct ompi_datatype_t * target_dt , struct ompi_win_t * win ) {
487
494
ompi_osc_ucx_module_t * module = (ompi_osc_ucx_module_t * ) win -> w_osc_module ;
495
+ opal_common_ucx_wpmem_t * mem = module -> mem ;
488
496
uint64_t remote_addr = (module -> addrs [target ]) + target_disp * OSC_UCX_GET_DISP (module , target );
489
497
bool is_origin_contig = false, is_target_contig = false;
490
498
ptrdiff_t origin_lb , origin_extent , target_lb , target_extent ;
491
- int ret = OMPI_SUCCESS ;
499
+ int ret = OMPI_SUCCESS , win_idx = -1 ;
492
500
493
501
ret = check_sync_state (module , target , false);
494
502
if (ret != OMPI_SUCCESS ) {
495
503
return ret ;
496
504
}
497
505
498
506
if (module -> flavor == MPI_WIN_FLAVOR_DYNAMIC ) {
499
- ret = get_dynamic_win_info (remote_addr , module , target );
507
+ ret = get_dynamic_win_info (remote_addr , module , target , & win_idx );
500
508
if (ret != OMPI_SUCCESS ) {
501
509
return ret ;
502
510
}
511
+ mem = module -> local_dynamic_win_info [win_idx ].mem ;
503
512
}
504
513
505
514
if (!target_count ) {
@@ -519,7 +528,7 @@ int ompi_osc_ucx_put(const void *origin_addr, int origin_count, struct ompi_data
519
528
ompi_datatype_type_size (origin_dt , & origin_len );
520
529
origin_len *= origin_count ;
521
530
522
- ret = opal_common_ucx_wpmem_putget (module -> mem , OPAL_COMMON_UCX_PUT , target ,
531
+ ret = opal_common_ucx_wpmem_putget (mem , OPAL_COMMON_UCX_PUT , target ,
523
532
(void * )((intptr_t )origin_addr + origin_lb ),
524
533
origin_len , remote_addr + target_lb );
525
534
if (OPAL_SUCCESS != ret ) {
@@ -539,21 +548,23 @@ int ompi_osc_ucx_get(void *origin_addr, int origin_count,
539
548
int target , ptrdiff_t target_disp , int target_count ,
540
549
struct ompi_datatype_t * target_dt , struct ompi_win_t * win ) {
541
550
ompi_osc_ucx_module_t * module = (ompi_osc_ucx_module_t * ) win -> w_osc_module ;
551
+ opal_common_ucx_wpmem_t * mem = module -> mem ;
542
552
uint64_t remote_addr = (module -> addrs [target ]) + target_disp * OSC_UCX_GET_DISP (module , target );
543
553
ptrdiff_t origin_lb , origin_extent , target_lb , target_extent ;
544
554
bool is_origin_contig = false, is_target_contig = false;
545
- int ret = OMPI_SUCCESS ;
555
+ int ret = OMPI_SUCCESS , win_idx = -1 ;
546
556
547
557
ret = check_sync_state (module , target , false);
548
558
if (ret != OMPI_SUCCESS ) {
549
559
return ret ;
550
560
}
551
561
552
562
if (module -> flavor == MPI_WIN_FLAVOR_DYNAMIC ) {
553
- ret = get_dynamic_win_info (remote_addr , module , target );
563
+ ret = get_dynamic_win_info (remote_addr , module , target , & win_idx );
554
564
if (ret != OMPI_SUCCESS ) {
555
565
return ret ;
556
566
}
567
+ mem = module -> local_dynamic_win_info [win_idx ].mem ;
557
568
}
558
569
559
570
if (!target_count ) {
@@ -574,7 +585,7 @@ int ompi_osc_ucx_get(void *origin_addr, int origin_count,
574
585
ompi_datatype_type_size (origin_dt , & origin_len );
575
586
origin_len *= origin_count ;
576
587
577
- ret = opal_common_ucx_wpmem_putget (module -> mem , OPAL_COMMON_UCX_GET , target ,
588
+ ret = opal_common_ucx_wpmem_putget (mem , OPAL_COMMON_UCX_GET , target ,
578
589
(void * )((intptr_t )origin_addr + origin_lb ),
579
590
origin_len , remote_addr + target_lb );
580
591
if (OPAL_SUCCESS != ret ) {
@@ -771,9 +782,10 @@ int ompi_osc_ucx_compare_and_swap(const void *origin_addr, const void *compare_a
771
782
int target , ptrdiff_t target_disp ,
772
783
struct ompi_win_t * win ) {
773
784
ompi_osc_ucx_module_t * module = (ompi_osc_ucx_module_t * )win -> w_osc_module ;
785
+ opal_common_ucx_wpmem_t * mem = module -> mem ;
774
786
uint64_t remote_addr = (module -> addrs [target ]) + target_disp * OSC_UCX_GET_DISP (module , target );
775
787
size_t dt_bytes ;
776
- int ret = OMPI_SUCCESS ;
788
+ int ret = OMPI_SUCCESS , win_idx = -1 ;
777
789
bool lock_acquired = false;
778
790
779
791
ret = check_sync_state (module , target , false);
@@ -782,10 +794,11 @@ int ompi_osc_ucx_compare_and_swap(const void *origin_addr, const void *compare_a
782
794
}
783
795
784
796
if (module -> flavor == MPI_WIN_FLAVOR_DYNAMIC ) {
785
- ret = get_dynamic_win_info (remote_addr , module , target );
797
+ ret = get_dynamic_win_info (remote_addr , module , target , & win_idx );
786
798
if (ret != OMPI_SUCCESS ) {
787
799
return ret ;
788
800
}
801
+ mem = module -> local_dynamic_win_info [win_idx ].mem ;
789
802
}
790
803
791
804
ompi_datatype_type_size (dt , & dt_bytes );
@@ -803,21 +816,21 @@ int ompi_osc_ucx_compare_and_swap(const void *origin_addr, const void *compare_a
803
816
return ret ;
804
817
}
805
818
806
- ret = opal_common_ucx_wpmem_putget (module -> mem , OPAL_COMMON_UCX_GET , target ,
819
+ ret = opal_common_ucx_wpmem_putget (mem , OPAL_COMMON_UCX_GET , target ,
807
820
& result_addr , dt_bytes , remote_addr );
808
821
if (OPAL_SUCCESS != ret ) {
809
822
OSC_UCX_VERBOSE (1 , "opal_common_ucx_mem_putget failed: %d" , ret );
810
823
return OMPI_ERROR ;
811
824
}
812
825
813
- ret = opal_common_ucx_wpmem_flush (module -> mem , OPAL_COMMON_UCX_SCOPE_EP , target );
826
+ ret = opal_common_ucx_wpmem_flush (mem , OPAL_COMMON_UCX_SCOPE_EP , target );
814
827
if (ret != OPAL_SUCCESS ) {
815
828
return ret ;
816
829
}
817
830
818
831
if (0 == memcmp (result_addr , compare_addr , dt_bytes )) {
819
832
// write the new value
820
- ret = opal_common_ucx_wpmem_putget (module -> mem , OPAL_COMMON_UCX_PUT , target ,
833
+ ret = opal_common_ucx_wpmem_putget (mem , OPAL_COMMON_UCX_PUT , target ,
821
834
(void * )origin_addr , dt_bytes , remote_addr );
822
835
if (OPAL_SUCCESS != ret ) {
823
836
OSC_UCX_VERBOSE (1 , "opal_common_ucx_mem_putget failed: %d" , ret );
@@ -834,7 +847,8 @@ int ompi_osc_ucx_fetch_and_op(const void *origin_addr, void *result_addr,
834
847
struct ompi_win_t * win ) {
835
848
size_t dt_bytes ;
836
849
ompi_osc_ucx_module_t * module = (ompi_osc_ucx_module_t * ) win -> w_osc_module ;
837
- int ret = OMPI_SUCCESS ;
850
+ opal_common_ucx_wpmem_t * mem = module -> mem ;
851
+ int ret = OMPI_SUCCESS , win_idx = -1 ;
838
852
839
853
ret = check_sync_state (module , target , false);
840
854
if (ret != OMPI_SUCCESS ) {
@@ -860,10 +874,11 @@ int ompi_osc_ucx_fetch_and_op(const void *origin_addr, void *result_addr,
860
874
}
861
875
862
876
if (module -> flavor == MPI_WIN_FLAVOR_DYNAMIC ) {
863
- ret = get_dynamic_win_info (remote_addr , module , target );
877
+ ret = get_dynamic_win_info (remote_addr , module , target , & win_idx );
864
878
if (ret != OMPI_SUCCESS ) {
865
879
return ret ;
866
880
}
881
+ mem = module -> local_dynamic_win_info [win_idx ].mem ;
867
882
}
868
883
869
884
value = origin_addr ? opal_common_ucx_load_uint64 (origin_addr , dt_bytes ) : 0 ;
@@ -877,7 +892,7 @@ int ompi_osc_ucx_fetch_and_op(const void *origin_addr, void *result_addr,
877
892
}
878
893
}
879
894
880
- ret = opal_common_ucx_wpmem_fetch_nb (module -> mem , opcode , value , target ,
895
+ ret = opal_common_ucx_wpmem_fetch_nb (mem , opcode , value , target ,
881
896
(void * )result_addr , dt_bytes ,
882
897
remote_addr , NULL , NULL );
883
898
@@ -1049,20 +1064,22 @@ int ompi_osc_ucx_rput(const void *origin_addr, int origin_count,
1049
1064
struct ompi_datatype_t * target_dt ,
1050
1065
struct ompi_win_t * win , struct ompi_request_t * * request ) {
1051
1066
ompi_osc_ucx_module_t * module = (ompi_osc_ucx_module_t * ) win -> w_osc_module ;
1067
+ opal_common_ucx_wpmem_t * mem = module -> mem ;
1052
1068
uint64_t remote_addr = (module -> addrs [target ]) + target_disp * OSC_UCX_GET_DISP (module , target );
1053
1069
ompi_osc_ucx_request_t * ucx_req = NULL ;
1054
- int ret = OMPI_SUCCESS ;
1070
+ int ret = OMPI_SUCCESS , win_idx = -1 ;
1055
1071
1056
1072
ret = check_sync_state (module , target , true);
1057
1073
if (ret != OMPI_SUCCESS ) {
1058
1074
return ret ;
1059
1075
}
1060
1076
1061
1077
if (module -> flavor == MPI_WIN_FLAVOR_DYNAMIC ) {
1062
- ret = get_dynamic_win_info (remote_addr , module , target );
1078
+ ret = get_dynamic_win_info (remote_addr , module , target , & win_idx );
1063
1079
if (ret != OMPI_SUCCESS ) {
1064
1080
return ret ;
1065
1081
}
1082
+ mem = module -> local_dynamic_win_info [win_idx ].mem ;
1066
1083
}
1067
1084
1068
1085
OMPI_OSC_UCX_REQUEST_ALLOC (win , ucx_req );
@@ -1074,15 +1091,15 @@ int ompi_osc_ucx_rput(const void *origin_addr, int origin_count,
1074
1091
return ret ;
1075
1092
}
1076
1093
1077
- ret = opal_common_ucx_wpmem_fence (module -> mem );
1094
+ ret = opal_common_ucx_wpmem_fence (mem );
1078
1095
if (ret != OMPI_SUCCESS ) {
1079
1096
OSC_UCX_VERBOSE (1 , "opal_common_ucx_mem_fence failed: %d" , ret );
1080
1097
return OMPI_ERROR ;
1081
1098
}
1082
1099
1083
1100
mca_osc_ucx_component .num_incomplete_req_ops ++ ;
1084
1101
/* TODO: investigate whether ucp_worker_flush_nb is a better choice here */
1085
- ret = opal_common_ucx_wpmem_fetch_nb (module -> mem , UCP_ATOMIC_FETCH_OP_FADD ,
1102
+ ret = opal_common_ucx_wpmem_fetch_nb (mem , UCP_ATOMIC_FETCH_OP_FADD ,
1086
1103
0 , target , & (module -> req_result ),
1087
1104
sizeof (uint64_t ), remote_addr & (~0x7 ),
1088
1105
req_completion , ucx_req );
@@ -1102,20 +1119,22 @@ int ompi_osc_ucx_rget(void *origin_addr, int origin_count,
1102
1119
struct ompi_datatype_t * target_dt , struct ompi_win_t * win ,
1103
1120
struct ompi_request_t * * request ) {
1104
1121
ompi_osc_ucx_module_t * module = (ompi_osc_ucx_module_t * ) win -> w_osc_module ;
1122
+ opal_common_ucx_wpmem_t * mem = module -> mem ;
1105
1123
uint64_t remote_addr = (module -> addrs [target ]) + target_disp * OSC_UCX_GET_DISP (module , target );
1106
1124
ompi_osc_ucx_request_t * ucx_req = NULL ;
1107
- int ret = OMPI_SUCCESS ;
1125
+ int ret = OMPI_SUCCESS , win_idx = -1 ;
1108
1126
1109
1127
ret = check_sync_state (module , target , true);
1110
1128
if (ret != OMPI_SUCCESS ) {
1111
1129
return ret ;
1112
1130
}
1113
1131
1114
1132
if (module -> flavor == MPI_WIN_FLAVOR_DYNAMIC ) {
1115
- ret = get_dynamic_win_info (remote_addr , module , target );
1133
+ ret = get_dynamic_win_info (remote_addr , module , target , & win_idx );
1116
1134
if (ret != OMPI_SUCCESS ) {
1117
1135
return ret ;
1118
1136
}
1137
+ mem = module -> local_dynamic_win_info [win_idx ].mem ;
1119
1138
}
1120
1139
1121
1140
OMPI_OSC_UCX_REQUEST_ALLOC (win , ucx_req );
@@ -1127,15 +1146,15 @@ int ompi_osc_ucx_rget(void *origin_addr, int origin_count,
1127
1146
return ret ;
1128
1147
}
1129
1148
1130
- ret = opal_common_ucx_wpmem_fence (module -> mem );
1149
+ ret = opal_common_ucx_wpmem_fence (mem );
1131
1150
if (ret != OMPI_SUCCESS ) {
1132
1151
OSC_UCX_VERBOSE (1 , "opal_common_ucx_mem_fence failed: %d" , ret );
1133
1152
return OMPI_ERROR ;
1134
1153
}
1135
1154
1136
1155
mca_osc_ucx_component .num_incomplete_req_ops ++ ;
1137
1156
/* TODO: investigate whether ucp_worker_flush_nb is a better choice here */
1138
- ret = opal_common_ucx_wpmem_fetch_nb (module -> mem , UCP_ATOMIC_FETCH_OP_FADD ,
1157
+ ret = opal_common_ucx_wpmem_fetch_nb (mem , UCP_ATOMIC_FETCH_OP_FADD ,
1139
1158
0 , target , & (module -> req_result ),
1140
1159
sizeof (uint64_t ), remote_addr & (~0x7 ),
1141
1160
req_completion , ucx_req );
0 commit comments