3
3
* Copyright (c) 2004-2011 The Trustees of Indiana University and Indiana
4
4
* University Research and Technology
5
5
* Corporation. All rights reserved.
6
- * Copyright (c) 2004-2009 The University of Tennessee and The University
6
+ * Copyright (c) 2004-2020 The University of Tennessee and The University
7
7
* of Tennessee Research Foundation. All rights
8
8
* reserved.
9
9
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
42
42
#include <sys/stat.h> /* for mkfifo */
43
43
#endif /* HAVE_SYS_STAT_H */
44
44
45
+ #include "opal/mca/hwloc/base/base.h"
45
46
#include "opal/mca/shmem/base/base.h"
46
47
#include "opal/mca/shmem/shmem.h"
47
48
#include "opal/util/bit_ops.h"
@@ -135,8 +136,15 @@ static inline unsigned int mca_btl_smcuda_param_register_uint(
135
136
return * storage ;
136
137
}
137
138
138
- static int mca_btl_smcuda_component_verify (void ) {
139
-
139
+ static int mca_btl_smcuda_component_verify (void )
140
+ {
141
+ /* We canot support async memcpy right now */
142
+ if ( (mca_btl_smcuda .super .btl_flags & MCA_BTL_FLAGS_CUDA_COPY_ASYNC_RECV ) ||
143
+ (mca_btl_smcuda .super .btl_flags & MCA_BTL_FLAGS_CUDA_COPY_ASYNC_SEND ) ) {
144
+ opal_output_verbose (10 , opal_btl_base_framework .framework_output ,
145
+ "btl: smcuda: disable all asynchronous memcpy support" );
146
+ }
147
+ mca_btl_smcuda .super .btl_flags &= ~(MCA_BTL_FLAGS_CUDA_COPY_ASYNC_RECV | MCA_BTL_FLAGS_CUDA_COPY_ASYNC_SEND );
140
148
return mca_btl_base_param_verify (& mca_btl_smcuda .super );
141
149
}
142
150
@@ -679,20 +687,15 @@ static void mca_btl_smcuda_send_cuda_ipc_ack(struct mca_btl_base_module_t* btl,
679
687
* BTL. It handles smcuda specific control messages that are triggered
680
688
* when GPU memory transfers are initiated. */
681
689
static void btl_smcuda_control (mca_btl_base_module_t * btl ,
682
- mca_btl_base_tag_t tag ,
683
- mca_btl_base_descriptor_t * des , void * cbdata )
690
+ const mca_btl_base_receive_descriptor_t * descriptor )
684
691
{
685
692
int mydevnum , ipcaccess , res ;
686
693
ctrlhdr_t ctrlhdr ;
687
694
opal_proc_t * ep_proc ;
688
- struct mca_btl_base_endpoint_t * endpoint ;
689
695
mca_btl_smcuda_t * smcuda_btl = (mca_btl_smcuda_t * )btl ;
690
- mca_btl_smcuda_frag_t * frag = ( mca_btl_smcuda_frag_t * ) des ;
691
- mca_btl_base_segment_t * segments = des -> des_segments ;
696
+ const mca_btl_base_segment_t * segments = descriptor -> des_segments ;
697
+ struct mca_btl_base_endpoint_t * endpoint = descriptor -> endpoint ;
692
698
693
- /* Use the rank of the peer that sent the data to get to the endpoint
694
- * structure. This is needed for PML callback. */
695
- endpoint = mca_btl_smcuda_component .sm_peers [frag -> hdr -> my_smp_rank ];
696
699
ep_proc = endpoint -> proc_opal ;
697
700
698
701
/* Copy out control message payload to examine it */
@@ -764,7 +767,6 @@ static void btl_smcuda_control(mca_btl_base_module_t* btl,
764
767
}
765
768
}
766
769
767
- assert (endpoint -> peer_smp_rank == frag -> hdr -> my_smp_rank );
768
770
opal_output_verbose (10 , mca_btl_smcuda_component .cuda_ipc_output ,
769
771
"Analyzed CUDA IPC request: myrank=%d, mydev=%d, peerrank=%d, "
770
772
"peerdev=%d --> ACCESS=%d" ,
@@ -872,6 +874,13 @@ mca_btl_smcuda_component_init(int *num_btls,
872
874
* shared-memory segment. this routine sets component sm_max_procs. */
873
875
calc_sm_max_procs (num_local_procs );
874
876
877
+ /* Before we can safely create the backend file we need to know minimal
878
+ * information about the local node. We need at least a size of a cache line
879
+ * as we align the data in the backing file to it. The simplest way for now is
880
+ * to force the HWLOC initialization.
881
+ */
882
+ opal_hwloc_base_get_topology ();
883
+
875
884
/* This is where the modex will live some day. For now, just have local rank
876
885
* 0 create a rendezvous file containing the backing store info, so the
877
886
* other local procs can read from it during add_procs. The rest will just
@@ -999,7 +1008,6 @@ int mca_btl_smcuda_component_progress(void)
999
1008
/* local variables */
1000
1009
mca_btl_base_segment_t seg ;
1001
1010
mca_btl_smcuda_frag_t * frag ;
1002
- mca_btl_smcuda_frag_t Frag ;
1003
1011
sm_fifo_t * fifo = NULL ;
1004
1012
mca_btl_smcuda_hdr_t * hdr ;
1005
1013
int my_smp_rank = mca_btl_smcuda_component .my_smp_rank ;
@@ -1046,7 +1054,6 @@ int mca_btl_smcuda_component_progress(void)
1046
1054
switch (((uintptr_t )hdr ) & MCA_BTL_SMCUDA_FRAG_TYPE_MASK ) {
1047
1055
case MCA_BTL_SMCUDA_FRAG_SEND :
1048
1056
{
1049
- mca_btl_active_message_callback_t * reg ;
1050
1057
/* change the address from address relative to the shared
1051
1058
* memory address, to a true virtual address */
1052
1059
hdr = (mca_btl_smcuda_hdr_t * ) RELATIVE2VIRTUAL (hdr );
@@ -1058,17 +1065,16 @@ int mca_btl_smcuda_component_progress(void)
1058
1065
my_smp_rank , peer_smp_rank , j , FIFO_MAP (peer_smp_rank ));
1059
1066
}
1060
1067
#endif
1061
- /* recv upcall */
1062
- reg = mca_btl_base_active_message_trigger + hdr -> tag ;
1063
1068
seg .seg_addr .pval = ((char * )hdr ) + sizeof (mca_btl_smcuda_hdr_t );
1064
1069
seg .seg_len = hdr -> len ;
1065
- Frag .base .des_segment_count = 1 ;
1066
- Frag .base .des_segments = & seg ;
1067
- #if OPAL_CUDA_SUPPORT
1068
- Frag .hdr = hdr ; /* needed for peer rank in control messages */
1069
- #endif /* OPAL_CUDA_SUPPORT */
1070
- reg -> cbfunc (& mca_btl_smcuda .super , hdr -> tag , & (Frag .base ),
1071
- reg -> cbdata );
1070
+
1071
+ mca_btl_active_message_callback_t * reg = mca_btl_base_active_message_trigger + hdr -> tag ;
1072
+ mca_btl_base_receive_descriptor_t recv_desc = {.endpoint = mca_btl_smcuda_component .sm_peers [peer_smp_rank ],
1073
+ .des_segments = & seg ,
1074
+ .des_segment_count = 1 ,
1075
+ .tag = hdr -> tag ,
1076
+ .cbdata = reg -> cbdata };
1077
+ reg -> cbfunc (& mca_btl_smcuda .super , & recv_desc );
1072
1078
/* return the fragment */
1073
1079
MCA_BTL_SMCUDA_FIFO_WRITE (
1074
1080
mca_btl_smcuda_component .sm_peers [peer_smp_rank ],
@@ -1101,27 +1107,27 @@ int mca_btl_smcuda_component_progress(void)
1101
1107
}
1102
1108
goto recheck_peer ;
1103
1109
}
1104
- default :
1105
- /* unknown */
1106
- /*
1107
- * This code path should presumably never be called.
1108
- * It's unclear if it should exist or, if so, how it should be written.
1109
- * If we want to return it to the sending process,
1110
- * we have to figure out who the sender is.
1111
- * It seems we need to subtract the mask bits.
1112
- * Then, hopefully this is an sm header that has an smp_rank field.
1113
- * Presumably that means the received header was relative.
1114
- * Or, maybe this code should just be removed.
1115
- */
1116
- opal_output (0 , "mca_btl_smcuda_component_progress read an unknown type of header" );
1117
- hdr = (mca_btl_smcuda_hdr_t * ) RELATIVE2VIRTUAL (hdr );
1118
- peer_smp_rank = hdr -> my_smp_rank ;
1119
- hdr = (mca_btl_smcuda_hdr_t * )((uintptr_t )hdr -> frag |
1120
- MCA_BTL_SMCUDA_FRAG_STATUS_MASK );
1121
- MCA_BTL_SMCUDA_FIFO_WRITE (
1122
- mca_btl_smcuda_component .sm_peers [peer_smp_rank ],
1123
- my_smp_rank , peer_smp_rank , hdr , false, true, rc );
1124
- break ;
1110
+ default :
1111
+ /* unknown */
1112
+ /*
1113
+ * This code path should presumably never be called.
1114
+ * It's unclear if it should exist or, if so, how it should be written.
1115
+ * If we want to return it to the sending process,
1116
+ * we have to figure out who the sender is.
1117
+ * It seems we need to subtract the mask bits.
1118
+ * Then, hopefully this is an sm header that has an smp_rank field.
1119
+ * Presumably that means the received header was relative.
1120
+ * Or, maybe this code should just be removed.
1121
+ */
1122
+ opal_output (0 , "mca_btl_smcuda_component_progress read an unknown type of header" );
1123
+ hdr = (mca_btl_smcuda_hdr_t * ) RELATIVE2VIRTUAL (hdr );
1124
+ peer_smp_rank = hdr -> my_smp_rank ;
1125
+ hdr = (mca_btl_smcuda_hdr_t * )((uintptr_t )hdr -> frag |
1126
+ MCA_BTL_SMCUDA_FRAG_STATUS_MASK );
1127
+ MCA_BTL_SMCUDA_FIFO_WRITE (
1128
+ mca_btl_smcuda_component .sm_peers [peer_smp_rank ],
1129
+ my_smp_rank , peer_smp_rank , hdr , false, true, rc );
1130
+ break ;
1125
1131
}
1126
1132
}
1127
1133
(void )rc ; /* this is safe to ignore as the message is requeued till success */
0 commit comments