Skip to content

Commit 71eee8e

Browse files
wfaderhold21ferrol aderholdt
and
ferrol aderholdt
authored
TEST: adds onesided alltoall test (#374) (#395)
* TEST: add onesided alltoall tests * TEST: initialize global work buffer * CODESTYLE: fix code style * REVIEW: address feedback * REVIEW: address feedback * API: require work buffer initialization * TL/UCP: update onesided a2a to match API * TEST: update test to match API Co-authored-by: ferrol aderholdt <faderholdt@nvidia.com> Co-authored-by: valentin petrov <valentinp@nvidia.com> (cherry picked from commit bbb0944) Co-authored-by: ferrol aderholdt <faderholdt@nvidia.com>
1 parent 489f95f commit 71eee8e

File tree

6 files changed

+199
-57
lines changed

6 files changed

+199
-57
lines changed

src/components/tl/ucp/alltoall/alltoall_onesided.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,12 @@ ucc_status_t ucc_tl_ucp_alltoall_onesided_progress(ucc_coll_task_t *ctask)
6262
ucc_rank_t gsize = UCC_TL_TEAM_SIZE(team);
6363
long * pSync = TASK_ARGS(task).global_work_buffer;
6464

65-
if (*pSync < gsize - 1 || task->send_completed < task->send_posted) {
65+
if (*pSync < gsize || task->send_completed < task->send_posted) {
6666
ucp_worker_progress(UCC_TL_UCP_TEAM_CTX(team)->ucp_worker);
6767
return UCC_INPROGRESS;
6868
}
6969

70-
*pSync = -1;
70+
pSync[0] = 0;
7171
task->super.super.status = UCC_OK;
7272
ucc_task_complete(ctask);
7373
return task->super.super.status;

src/ucc/api/ucc.h

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1848,15 +1848,16 @@ typedef struct ucc_coll_args {
18481848
collectives */
18491849
ucc_error_type_t error_type; /*!< Error type */
18501850
ucc_coll_id_t tag; /*!< Used for ordering collectives */
1851-
void *global_work_buffer; /*!< User allocated scratchpad
1852-
buffer for one-sided
1853-
collectives. The buffer
1854-
provided should be at least
1855-
the size returned by @ref
1856-
ucc_context_get_attr with
1857-
the field mask -
1858-
UCC_CONTEXT_ATTR_FIELD_WORK_BUFFER_SIZE
1859-
set to 1. */
1851+
void *global_work_buffer; /*!< User allocated scratchpad
1852+
buffer for one-sided
1853+
collectives. The buffer
1854+
provided should be at least
1855+
the size returned by @ref
1856+
ucc_context_get_attr with
1857+
the field mask -
1858+
UCC_CONTEXT_ATTR_FIELD_WORK_BUFFER_SIZE
1859+
set to 1. The buffer must be initialized
1860+
to 0. */
18601861
ucc_coll_callback_t cb;
18611862
double timeout; /*!< Timeout in seconds */
18621863
} ucc_coll_args_t;

test/gtest/common/test_ucc.cc

Lines changed: 71 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ UccProcess::~UccProcess()
4141
{
4242
EXPECT_EQ(UCC_OK, ucc_context_destroy(ctx_h));
4343
EXPECT_EQ(UCC_OK, ucc_finalize(lib_h));
44+
if (ctx_params.mask & UCC_CONTEXT_PARAM_FIELD_MEM_PARAMS) {
45+
for (auto i = 0; i < UCC_TEST_N_MEM_SEGMENTS; i++) {
46+
ucc_free(onesided_buf[i]);
47+
}
48+
}
4449
}
4550

4651
ucc_status_t UccTeam::allgather(void *src_buf, void *recv_buf, size_t size,
@@ -157,7 +162,8 @@ uint64_t rank_map_cb(uint64_t ep, void *cb_ctx) {
157162
return (uint64_t)team->procs[(int)ep].p.get()->job_rank;
158163
}
159164

160-
void UccTeam::init_team(bool use_team_ep_map, bool use_ep_range)
165+
void UccTeam::init_team(bool use_team_ep_map, bool use_ep_range,
166+
bool is_onesided)
161167
{
162168
ucc_team_params_t team_params;
163169
std::vector<allgather_coll_info_t *> cis;
@@ -189,6 +195,10 @@ void UccTeam::init_team(bool use_team_ep_map, bool use_ep_range)
189195
team_params.oob.oob_ep = i;
190196
team_params.mask |= UCC_TEAM_PARAM_FIELD_OOB;
191197
}
198+
if (is_onesided) {
199+
team_params.mask |= UCC_TEAM_PARAM_FIELD_FLAGS;
200+
team_params.flags = UCC_TEAM_FLAG_COLL_WORK_BUFFER;
201+
}
192202
EXPECT_EQ(UCC_OK,
193203
ucc_team_create_post(&(procs[i].p.get()->ctx_h), 1, &team_params,
194204
&(procs[i].team)));
@@ -211,7 +221,6 @@ void UccTeam::init_team(bool use_team_ep_map, bool use_ep_range)
211221
}
212222
}
213223

214-
215224
void UccTeam::destroy_team()
216225
{
217226
ucc_status_t status;
@@ -241,7 +250,7 @@ void UccTeam::progress()
241250
}
242251

243252
UccTeam::UccTeam(std::vector<UccProcess_h> &_procs, bool use_team_ep_map,
244-
bool use_ep_range)
253+
bool use_ep_range, bool is_onesided)
245254
{
246255
n_procs = _procs.size();
247256
ag.resize(n_procs);
@@ -252,7 +261,7 @@ UccTeam::UccTeam(std::vector<UccProcess_h> &_procs, bool use_team_ep_map,
252261
a.phase = AG_INIT;
253262
}
254263
copy_complete_count = 0;
255-
init_team(use_team_ep_map, use_ep_range);
264+
init_team(use_team_ep_map, use_ep_range, is_onesided);
256265
// test_allgather(128);
257266
}
258267

@@ -293,7 +302,6 @@ UccJob::UccJob(int _n_procs, ucc_job_ctx_mode_t _ctx_mode, ucc_job_env_t vars) :
293302
/*restore original env */
294303
setenv(v.first.c_str(), v.second.c_str(), 1);
295304
}
296-
297305
}
298306

299307
void thread_allgather(void *src_buf, void *recv_buf, size_t size,
@@ -391,13 +399,61 @@ void proc_context_create(UccProcess_h proc, int id, ThreadAllgather *ta, bool is
391399
throw std::runtime_error(err_msg.str());
392400
}
393401

402+
void proc_context_create_mem_params(UccProcess_h proc, int id,
403+
ThreadAllgather *ta)
404+
{
405+
ucc_status_t status;
406+
ucc_context_config_h ctx_config;
407+
std::stringstream err_msg;
408+
ucc_mem_map_t map[UCC_TEST_N_MEM_SEGMENTS];
409+
410+
status = ucc_context_config_read(proc->lib_h, NULL, &ctx_config);
411+
if (status != UCC_OK) {
412+
err_msg << "ucc_context_config_read failed";
413+
goto exit_err;
414+
}
415+
for (auto i = 0; i < UCC_TEST_N_MEM_SEGMENTS; i++) {
416+
proc->onesided_buf[i] =
417+
ucc_calloc(UCC_TEST_MEM_SEGMENT_SIZE, 1, "onesided_buffer");
418+
EXPECT_NE(proc->onesided_buf[i], nullptr);
419+
map[i].address = proc->onesided_buf[i];
420+
map[i].len = UCC_TEST_MEM_SEGMENT_SIZE;
421+
}
422+
proc->ctx_params.mask = UCC_CONTEXT_PARAM_FIELD_OOB;
423+
proc->ctx_params.mask |= UCC_CONTEXT_PARAM_FIELD_MEM_PARAMS;
424+
proc->ctx_params.oob.allgather = thread_allgather_start;
425+
proc->ctx_params.oob.req_test = thread_allgather_req_test;
426+
proc->ctx_params.oob.req_free = thread_allgather_req_free;
427+
proc->ctx_params.oob.coll_info = (void *)&ta->reqs[id];
428+
proc->ctx_params.oob.n_oob_eps = ta->n_procs;
429+
proc->ctx_params.oob.oob_ep = id;
430+
proc->ctx_params.mem_params.segments = map;
431+
proc->ctx_params.mem_params.n_segments = UCC_TEST_N_MEM_SEGMENTS;
432+
status = ucc_context_create(proc->lib_h, &proc->ctx_params, ctx_config,
433+
&proc->ctx_h);
434+
ucc_context_config_release(ctx_config);
435+
if (status != UCC_OK) {
436+
err_msg << "ucc_context_create for one-sided context failed";
437+
goto exit_err;
438+
}
439+
return;
440+
441+
exit_err:
442+
err_msg << ": " << ucc_status_string(status) << " (" << status << ")";
443+
throw std::runtime_error(err_msg.str());
444+
}
394445

395446
void UccJob::create_context()
396447
{
397448
std::vector<std::thread> workers;
398449
for (auto i = 0; i < procs.size(); i++) {
399-
workers.push_back(std::thread(proc_context_create, procs[i], i, &ta,
400-
ctx_mode == UCC_JOB_CTX_GLOBAL));
450+
if (ctx_mode == UCC_JOB_CTX_GLOBAL_ONESIDED) {
451+
workers.push_back(
452+
std::thread(proc_context_create_mem_params, procs[i], i, &ta));
453+
} else {
454+
workers.push_back(std::thread(proc_context_create, procs[i], i, &ta,
455+
ctx_mode == UCC_JOB_CTX_GLOBAL));
456+
}
401457
}
402458
for (auto i = 0; i < procs.size(); i++) {
403459
workers[i].join();
@@ -464,28 +520,29 @@ void UccJob::cleanup()
464520
}
465521

466522
UccTeam_h UccJob::create_team(int _n_procs, bool use_team_ep_map,
467-
bool use_ep_range)
523+
bool use_ep_range, bool is_onesided)
468524
{
469525
EXPECT_GE(n_procs, _n_procs);
470526
std::vector<UccProcess_h> team_procs;
471-
for (int i=0; i<_n_procs; i++) {
527+
for (int i = 0; i < _n_procs; i++) {
472528
team_procs.push_back(procs[i]);
473529
}
474-
return std::make_shared<UccTeam>(team_procs, use_team_ep_map, use_ep_range);
530+
return std::make_shared<UccTeam>(team_procs, use_team_ep_map, use_ep_range,
531+
is_onesided);
475532
}
476533

477534
UccTeam_h UccJob::create_team(std::vector<int> &ranks, bool use_team_ep_map,
478-
bool use_ep_range)
535+
bool use_ep_range, bool is_onesided)
479536
{
480537
EXPECT_GE(n_procs, ranks.size());
481538
std::vector<UccProcess_h> team_procs;
482-
for (int i=0; i<ranks.size(); i++) {
539+
for (int i = 0; i < ranks.size(); i++) {
483540
team_procs.push_back(procs[ranks[i]]);
484541
}
485-
return std::make_shared<UccTeam>(team_procs, use_team_ep_map, use_ep_range);
542+
return std::make_shared<UccTeam>(team_procs, use_team_ep_map, use_ep_range,
543+
is_onesided);
486544
}
487545

488-
489546
UccReq::UccReq(UccTeam_h _team, ucc_coll_args_t *args) :
490547
team(_team)
491548
{

test/gtest/common/test_ucc.h

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ class UccProcess {
129129
};
130130
ucc_lib_h lib_h;
131131
ucc_context_h ctx_h;
132+
void * onesided_buf[3];
132133
int job_rank;
133134
UccProcess(int _job_rank,
134135
const ucc_lib_params_t &lp = default_lib_params,
@@ -163,7 +164,7 @@ class UccTeam {
163164
UccTeam *self;
164165
} allgather_coll_info_t;
165166
std::vector<struct allgather_data> ag;
166-
void init_team(bool use_team_ep_map, bool use_ep_range);
167+
void init_team(bool use_team_ep_map, bool use_ep_range, bool is_onesided);
167168
void destroy_team();
168169
void test_allgather(size_t msglen);
169170
static ucc_status_t allgather(void *src_buf, void *recv_buf, size_t size,
@@ -176,7 +177,7 @@ class UccTeam {
176177
void progress();
177178
std::vector<proc> procs;
178179
UccTeam(std::vector<UccProcess_h> &_procs, bool use_team_ep_map = false,
179-
bool use_ep_range = true);
180+
bool use_ep_range = true, bool is_onesided = false);
180181
~UccTeam();
181182
};
182183
typedef std::shared_ptr<UccTeam> UccTeam_h;
@@ -191,7 +192,8 @@ class UccJob {
191192
public:
192193
typedef enum {
193194
UCC_JOB_CTX_LOCAL,
194-
UCC_JOB_CTX_GLOBAL /*< ucc ctx create with OOB */
195+
UCC_JOB_CTX_GLOBAL, /*< ucc ctx create with OOB */
196+
UCC_JOB_CTX_GLOBAL_ONESIDED
195197
} ucc_job_ctx_mode_t;
196198
static const int nStaticTeams = 3;
197199
static const int staticUccJobSize = 16;
@@ -205,9 +207,9 @@ class UccJob {
205207
~UccJob();
206208
std::vector<UccProcess_h> procs;
207209
UccTeam_h create_team(int n_procs, bool use_team_ep_map = false,
208-
bool use_ep_range = true);
210+
bool use_ep_range = true, bool is_onesided = false);
209211
UccTeam_h create_team(std::vector<int> &ranks, bool use_team_ep_map = false,
210-
bool use_ep_range = true);
212+
bool use_ep_range = true, bool is_onesided = false);
211213
void create_context();
212214
ucc_job_ctx_mode_t ctx_mode;
213215
};
@@ -241,4 +243,7 @@ void clear_buffer(void *_buf, size_t size, ucc_memory_type_t mt, uint8_t value);
241243
UCC_DT_UINT8, UCC_DT_UINT16, UCC_DT_UINT32, UCC_DT_UINT64, UCC_DT_UINT128,\
242244
UCC_DT_FLOAT16, UCC_DT_FLOAT32, UCC_DT_FLOAT64)
243245

246+
#define UCC_TEST_N_MEM_SEGMENTS 3
247+
#define UCC_TEST_MEM_SEGMENT_SIZE (1 << 20)
248+
244249
#endif

0 commit comments

Comments
 (0)