-
Notifications
You must be signed in to change notification settings - Fork 0
Topic/osc mt (modified) #3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Conversation
@@ -29,6 +29,10 @@ typedef struct ucx_iovec { | |||
size_t len; | |||
} ucx_iovec_t; | |||
|
|||
OBJ_CLASS_INSTANCE(thread_local_info_t, opal_list_item_t, NULL, NULL); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
- Has to be in the common/ucx *.c file
- let's create constructor/destructor
@@ -29,6 +29,10 @@ typedef struct ucx_iovec { | |||
size_t len; | |||
} ucx_iovec_t; | |||
|
|||
OBJ_CLASS_INSTANCE(thread_local_info_t, opal_list_item_t, NULL, NULL); | |||
|
|||
pthread_key_t my_thread_key = {0}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
- This has to be declared in common/ucx/ *.c file
- Use opal_common_ucx_ prefix
- extern declaration has to be in common/ucx
ret = opal_common_ucx_worker_flush(mca_osc_ucx_component.ucp_worker); | ||
if (ret != OMPI_SUCCESS) { | ||
return ret; | ||
} | ||
pthread_mutex_unlock(&mca_osc_ucx_component.worker_mutex); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
- Move this into the common code.
- Make this function inline and provide the parameter to select either EP or worker flush (to allow compiler static optimization
inline in func(int flag){
{
....
if( flag ){
do1;
} else {
do2;
}
....
}
....
func(0); // compiler will optimiza and remove if branch
@@ -35,6 +35,10 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in | |||
int flavor, int *model); | |||
static void ompi_osc_ucx_unregister_progress(void); | |||
|
|||
opal_list_t active_workers = {{0}}, idle_workers = {{0}}; | |||
pthread_mutex_t active_workers_mutex = PTHREAD_MUTEX_INITIALIZER; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Has to go to the common code
@@ -35,6 +35,10 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in | |||
int flavor, int *model); | |||
static void ompi_osc_ucx_unregister_progress(void); | |||
|
|||
opal_list_t active_workers = {{0}}, idle_workers = {{0}}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This one also goes to the common code
@@ -323,7 +400,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in | |||
UCP_PARAM_FIELD_REQUEST_INIT | | |||
UCP_PARAM_FIELD_REQUEST_SIZE; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
- have a hadler that describes UCX worker pool
- UCX context
- list of active/idle workers
- all required thread-level information.
handler = opal_common_ucx_workerpool_init(allgather_ptr, allgatherv_ptr);
opal_common_ucx_workerpool_init
- Initializes UCX: context creation
- create main worker
- exchange EP's
- records main thread
@@ -468,18 +523,20 @@ int ompi_osc_ucx_get(void *origin_addr, int origin_count, | |||
ompi_datatype_type_size(origin_dt, &origin_len); | |||
origin_len *= origin_count; | |||
|
|||
pthread_mutex_lock(mutex_ptr); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This code block goes into the common code.
@@ -168,6 +207,7 @@ static inline int ddt_put_get(ompi_osc_ucx_module_t *module, | |||
curr_len = MIN(origin_ucx_iov[origin_ucx_iov_idx].len, | |||
target_ucx_iov[target_ucx_iov_idx].len); | |||
|
|||
pthread_mutex_lock(mutex_ptr); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
- This block goes to common
- Make sure to always unlock (including the error path)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this has to be an inline function with argument put/get that will allow static compile optimization
@@ -208,6 +249,7 @@ static inline int ddt_put_get(ompi_osc_ucx_module_t *module, | |||
} else if (!is_origin_contig) { | |||
size_t prev_len = 0; | |||
while (origin_ucx_iov_idx < origin_ucx_iov_count) { | |||
pthread_mutex_lock(mutex_ptr); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
- This block goes to common
- See above comment about locks
@@ -237,6 +280,7 @@ static inline int ddt_put_get(ompi_osc_ucx_module_t *module, | |||
} else { | |||
size_t prev_len = 0; | |||
while (target_ucx_iov_idx < target_ucx_iov_count) { | |||
pthread_mutex_lock(mutex_ptr); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
- This block also goes to the common
- see above about locks
No description provided.