Skip to content

Commit 94b5e91

Browse files
committed
OSHMEM: Add support for shmemx_malloc_with_hint()
- added multiple segments processing - added shmemx_malloc_with_hint call + set of hints Signed-off-by: Sergey Oblomov <sergeyo@mellanox.com>
1 parent dabad08 commit 94b5e91

27 files changed

+636
-86
lines changed

oshmem/include/pshmemx.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@
1616
extern "C" {
1717
#endif
1818

19+
/*
20+
* Symmetric heap routines
21+
*/
22+
OSHMEM_DECLSPEC void* pshmemx_malloc_with_hint(size_t size, long hint);
23+
1924

2025
/*
2126
* Legacy API

oshmem/include/shmemx.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,29 @@
1818
extern "C" {
1919
#endif
2020

21+
enum {
22+
SHMEM_HINT_NONE = 0,
23+
SHMEM_HINT_LOW_LAT_MEM = 1 << 0,
24+
SHMEM_HINT_HIGH_BW_MEM = 1 << 1,
25+
SHMEM_HINT_NEAR_NIC_MEM = 1 << 2,
26+
SHMEM_HINT_DEVICE_GPU_MEM = 1 << 3,
27+
SHMEM_HINT_DEVICE_NIC_MEM = 1 << 4,
28+
29+
SHMEM_HINT_PSYNC = 1 << 16,
30+
SHMEM_HINT_PWORK = 1 << 17,
31+
SHMEM_HINT_ATOMICS = 1 << 18
32+
};
33+
2134
/*
2235
* All OpenSHMEM extension APIs that are not part of this specification must be defined in the shmemx.h include
2336
* file. These extensions shall use the shmemx_ prefix for all routine, variable, and constant names.
2437
*/
2538

39+
/*
40+
* Symmetric heap routines
41+
*/
42+
OSHMEM_DECLSPEC void* shmemx_malloc_with_hint(size_t size, long hint);
43+
2644
/*
2745
* Elemental put routines
2846
*/

oshmem/mca/memheap/base/base.h

Lines changed: 31 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -41,23 +41,27 @@ OSHMEM_DECLSPEC int mca_memheap_base_select(void);
4141
extern int mca_memheap_base_already_opened;
4242
extern int mca_memheap_base_key_exchange;
4343

44-
#define MCA_MEMHEAP_MAX_SEGMENTS 4
45-
#define HEAP_SEG_INDEX 0
46-
#define SYMB_SEG_INDEX 1
47-
#define MCA_MEMHEAP_SEG_COUNT (SYMB_SEG_INDEX+1)
44+
#define MCA_MEMHEAP_MAX_SEGMENTS 8
45+
#define HEAP_SEG_INDEX 0
4846

4947
#define MEMHEAP_SEG_INVALID 0xFFFF
5048

5149

50+
typedef struct mca_memheap_base_config {
51+
long device_nic_mem_seg_size; /* Used for SHMEM_HINT_DEVICE_NIC_MEM */
52+
} mca_memheap_base_config_t;
53+
54+
5255
typedef struct mca_memheap_map {
5356
map_segment_t mem_segs[MCA_MEMHEAP_MAX_SEGMENTS]; /* TODO: change into pointer array */
5457
int n_segments;
5558
int num_transports;
5659
} mca_memheap_map_t;
5760

5861
extern mca_memheap_map_t mca_memheap_base_map;
62+
extern mca_memheap_base_config_t mca_memheap_base_config;
5963

60-
int mca_memheap_base_alloc_init(mca_memheap_map_t *, size_t);
64+
int mca_memheap_base_alloc_init(mca_memheap_map_t *, size_t, long);
6165
void mca_memheap_base_alloc_exit(mca_memheap_map_t *);
6266
int mca_memheap_base_static_init(mca_memheap_map_t *);
6367
void mca_memheap_base_static_exit(mca_memheap_map_t *);
@@ -173,10 +177,12 @@ static inline int memheap_is_va_in_segment(void *va, int segno)
173177

174178
static inline int memheap_find_segnum(void *va)
175179
{
176-
if (OPAL_LIKELY(memheap_is_va_in_segment(va, SYMB_SEG_INDEX))) {
177-
return SYMB_SEG_INDEX;
178-
} else if (memheap_is_va_in_segment(va, HEAP_SEG_INDEX)) {
179-
return HEAP_SEG_INDEX;
180+
int i;
181+
182+
for (i = 0; i < mca_memheap_base_map.n_segments; i++) {
183+
if (memheap_is_va_in_segment(va, i)) {
184+
return i;
185+
}
180186
}
181187
return MEMHEAP_SEG_INVALID;
182188
}
@@ -193,18 +199,17 @@ static inline void *map_segment_va2rva(mkey_segment_t *seg, void *va)
193199
return memheap_va2rva(va, seg->super.va_base, seg->rva_base);
194200
}
195201

196-
static inline map_base_segment_t *map_segment_find_va(map_base_segment_t *segs, size_t elem_size, void *va)
202+
static inline map_base_segment_t *map_segment_find_va(map_base_segment_t *segs,
203+
size_t elem_size, void *va)
197204
{
198205
map_base_segment_t *rseg;
206+
int i;
199207

200-
rseg = (map_base_segment_t *)((char *)segs + elem_size * HEAP_SEG_INDEX);
201-
if (OPAL_LIKELY(map_segment_is_va_in(rseg, va))) {
202-
return rseg;
203-
}
204-
205-
rseg = (map_base_segment_t *)((char *)segs + elem_size * SYMB_SEG_INDEX);
206-
if (OPAL_LIKELY(map_segment_is_va_in(rseg, va))) {
207-
return rseg;
208+
for (i = 0; i < MCA_MEMHEAP_MAX_SEGMENTS; i++) {
209+
rseg = (map_base_segment_t *)((char *)segs + elem_size * i);
210+
if (OPAL_LIKELY(map_segment_is_va_in(rseg, va))) {
211+
return rseg;
212+
}
208213
}
209214

210215
return NULL;
@@ -214,21 +219,14 @@ void mkey_segment_init(mkey_segment_t *seg, sshmem_mkey_t *mkey, uint32_t segno)
214219

215220
static inline map_segment_t *memheap_find_va(void* va)
216221
{
217-
map_segment_t *s;
218-
219-
/* most probably there will be only two segments: heap and global data */
220-
if (OPAL_LIKELY(memheap_is_va_in_segment(va, SYMB_SEG_INDEX))) {
221-
s = &memheap_map->mem_segs[SYMB_SEG_INDEX];
222-
} else if (memheap_is_va_in_segment(va, HEAP_SEG_INDEX)) {
223-
s = &memheap_map->mem_segs[HEAP_SEG_INDEX];
224-
} else if (memheap_map->n_segments - 2 > 0) {
225-
s = bsearch(va,
226-
&memheap_map->mem_segs[SYMB_SEG_INDEX+1],
227-
memheap_map->n_segments - 2,
228-
sizeof(*s),
229-
mca_memheap_seg_cmp);
230-
} else {
231-
s = NULL;
222+
map_segment_t *s = NULL;
223+
int i;
224+
225+
for (i = 0; i < memheap_map->n_segments; i++) {
226+
if (memheap_is_va_in_segment(va, i)) {
227+
s = &memheap_map->mem_segs[i];
228+
break;
229+
}
232230
}
233231

234232
#if MEMHEAP_BASE_DEBUG == 1

oshmem/mca/memheap/base/memheap_base_alloc.c

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,21 @@
1919
#include "oshmem/mca/memheap/base/base.h"
2020

2121

22-
int mca_memheap_base_alloc_init(mca_memheap_map_t *map, size_t size)
22+
int mca_memheap_base_alloc_init(mca_memheap_map_t *map, size_t size, long hint)
2323
{
2424
int ret = OSHMEM_SUCCESS;
2525
char * seg_filename = NULL;
2626

2727
assert(map);
28-
assert(HEAP_SEG_INDEX == map->n_segments);
28+
if (hint == 0) {
29+
assert(HEAP_SEG_INDEX == map->n_segments);
30+
} else {
31+
assert(HEAP_SEG_INDEX < map->n_segments);
32+
}
2933

3034
map_segment_t *s = &map->mem_segs[map->n_segments];
3135
seg_filename = oshmem_get_unique_file_name(oshmem_my_proc_id());
32-
ret = mca_sshmem_segment_create(s, seg_filename, size);
36+
ret = mca_sshmem_segment_create(s, seg_filename, size, hint);
3337

3438
if (OSHMEM_SUCCESS == ret) {
3539
map->n_segments++;
@@ -45,12 +49,34 @@ int mca_memheap_base_alloc_init(mca_memheap_map_t *map, size_t size)
4549

4650
void mca_memheap_base_alloc_exit(mca_memheap_map_t *map)
4751
{
48-
if (map) {
49-
map_segment_t *s = &map->mem_segs[HEAP_SEG_INDEX];
52+
int i;
53+
54+
if (!map) {
55+
return;
56+
}
57+
58+
for (i = 0; i < map->n_segments; ++i) {
59+
map_segment_t *s = &map->mem_segs[i];
60+
if (s->type != MAP_SEGMENT_STATIC) {
61+
mca_sshmem_segment_detach(s, NULL);
62+
mca_sshmem_unlink(s);
63+
}
64+
}
65+
}
5066

51-
assert(s);
67+
int mca_memheap_alloc_with_hint(size_t size, long hint, void** ptr)
68+
{
69+
int i;
5270

53-
mca_sshmem_segment_detach(s, NULL);
54-
mca_sshmem_unlink(s);
71+
for (i = 0; i < mca_memheap_base_map.n_segments; i++) {
72+
map_segment_t *s = &mca_memheap_base_map.mem_segs[i];
73+
if (s->allocator && (hint && s->alloc_hints)) {
74+
/* Do not fall back to default allocator since it will break the
75+
* symmetry between PEs
76+
*/
77+
return s->allocator->realloc(s, size, NULL, ptr);
78+
}
5579
}
80+
81+
return MCA_MEMHEAP_CALL(alloc(size, ptr));
5682
}

oshmem/mca/memheap/base/memheap_base_frame.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,12 @@ static int mca_memheap_base_register(mca_base_register_flag_t flags)
5252
MCA_BASE_VAR_SCOPE_READONLY,
5353
&mca_memheap_base_key_exchange);
5454

55+
mca_base_var_register("oshmem", "memheap", "base", "device_nic_mem_seg_size",
56+
"Size of memory block used for allocations with hint SHMEM_HINT_DEVICE_NIC_MEM",
57+
MCA_BASE_VAR_TYPE_LONG, NULL, 0,
58+
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
59+
MCA_BASE_VAR_SCOPE_LOCAL,
60+
&mca_memheap_base_config.device_nic_mem_seg_size);
5561

5662
return OSHMEM_SUCCESS;
5763
}

oshmem/mca/memheap/base/memheap_base_mkey.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -749,7 +749,7 @@ void mkey_segment_init(mkey_segment_t *seg, sshmem_mkey_t *mkey, uint32_t segno)
749749
{
750750
map_segment_t *s;
751751

752-
if (segno >= MCA_MEMHEAP_SEG_COUNT) {
752+
if (segno >= MCA_MEMHEAP_MAX_SEGMENTS) {
753753
return;
754754
}
755755

oshmem/mca/memheap/base/memheap_base_select.c

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,14 @@
2121
#include "oshmem/util/oshmem_util.h"
2222
#include "oshmem/mca/memheap/memheap.h"
2323
#include "oshmem/mca/memheap/base/base.h"
24+
#include "orte/mca/errmgr/errmgr.h"
25+
#include "oshmem/include/shmemx.h"
26+
#include "oshmem/mca/sshmem/base/base.h"
27+
28+
29+
mca_memheap_base_config_t mca_memheap_base_config = {
30+
.device_nic_mem_seg_size = 0
31+
};
2432

2533
mca_memheap_base_module_t mca_memheap = {0};
2634

@@ -94,7 +102,7 @@ static memheap_context_t* _memheap_create(void)
94102
{
95103
int rc = OSHMEM_SUCCESS;
96104
static memheap_context_t context;
97-
size_t user_size;
105+
size_t user_size, size;
98106

99107
user_size = _memheap_size();
100108
if (user_size < MEMHEAP_BASE_MIN_SIZE) {
@@ -105,7 +113,18 @@ static memheap_context_t* _memheap_create(void)
105113
/* Inititialize symmetric area */
106114
if (OSHMEM_SUCCESS == rc) {
107115
rc = mca_memheap_base_alloc_init(&mca_memheap_base_map,
108-
user_size + MEMHEAP_BASE_PRIVATE_SIZE);
116+
user_size + MEMHEAP_BASE_PRIVATE_SIZE, 0);
117+
}
118+
119+
/* Initialize atomic symmetric area */
120+
size = mca_memheap_base_config.device_nic_mem_seg_size;
121+
if ((OSHMEM_SUCCESS == rc) && (size > 0)) {
122+
rc = mca_memheap_base_alloc_init(&mca_memheap_base_map, size,
123+
SHMEM_HINT_DEVICE_NIC_MEM);
124+
if (rc == OSHMEM_ERR_NOT_IMPLEMENTED) {
125+
/* do not treat NOT_IMPLEMENTED as error */
126+
rc = OSHMEM_SUCCESS;
127+
}
109128
}
110129

111130
/* Inititialize static/global variables area */

oshmem/mca/memheap/base/memheap_base_static.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ int mca_memheap_base_static_init(mca_memheap_map_t *map)
4949
int ret = OSHMEM_SUCCESS;
5050

5151
assert(map);
52-
assert(SYMB_SEG_INDEX <= map->n_segments);
52+
assert(HEAP_SEG_INDEX < map->n_segments);
5353

5454
ret = _load_segments();
5555

oshmem/mca/memheap/memheap.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,8 @@ typedef struct mca_memheap_base_module_t mca_memheap_base_module_t;
138138

139139
OSHMEM_DECLSPEC extern mca_memheap_base_module_t mca_memheap;
140140

141+
int mca_memheap_alloc_with_hint(size_t size, long hint, void**);
142+
141143
static inline int mca_memheap_base_mkey_is_shm(sshmem_mkey_t *mkey)
142144
{
143145
return (0 == mkey->len) && (MAP_SEGMENT_SHM_INVALID != (int)mkey->u.key);

oshmem/mca/spml/ucx/spml_ucx.c

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "oshmem/runtime/runtime.h"
3737

3838
#include "oshmem/mca/spml/ucx/spml_ucx_component.h"
39+
#include "oshmem/mca/sshmem/ucx/sshmem_ucx.h"
3940

4041
/* Turn ON/OFF debug output from build (default 0) */
4142
#ifndef SPML_UCX_PUT_DEBUG
@@ -267,7 +268,7 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs)
267268
OSHMEM_PROC_DATA(procs[i])->num_transports = 1;
268269
OSHMEM_PROC_DATA(procs[i])->transport_ids = spml_ucx_transport_ids;
269270

270-
for (j = 0; j < MCA_MEMHEAP_SEG_COUNT; j++) {
271+
for (j = 0; j < MCA_MEMHEAP_MAX_SEGMENTS; j++) {
271272
mca_spml_ucx_ctx_default.ucp_peers[i].mkeys[j].key.rkey = NULL;
272273
}
273274

@@ -438,7 +439,8 @@ sshmem_mkey_t *mca_spml_ucx_register(void* addr,
438439
}
439440

440441
} else {
441-
ucx_mkey->mem_h = (ucp_mem_h)mem_seg->context;
442+
mca_sshmem_ucx_segment_context_t *ctx = mem_seg->context;
443+
ucx_mkey->mem_h = ctx->ucp_memh;
442444
}
443445

444446
status = ucp_rkey_pack(mca_spml_ucx.ucp_context, ucx_mkey->mem_h,
@@ -589,17 +591,19 @@ static int mca_spml_ucx_ctx_create_common(long options, mca_spml_ucx_ctx_t **ucx
589591
goto error2;
590592
}
591593

592-
for (j = 0; j < MCA_MEMHEAP_SEG_COUNT; j++) {
594+
for (j = 0; j < memheap_map->n_segments; j++) {
593595
mkey = &memheap_map->mem_segs[j].mkeys_cache[i][0];
594596
ucx_mkey = &ucx_ctx->ucp_peers[i].mkeys[j].key;
595-
err = ucp_ep_rkey_unpack(ucx_ctx->ucp_peers[i].ucp_conn,
596-
mkey->u.data,
597-
&ucx_mkey->rkey);
598-
if (UCS_OK != err) {
599-
SPML_UCX_ERROR("failed to unpack rkey");
600-
goto error2;
597+
if (mkey->u.data) {
598+
err = ucp_ep_rkey_unpack(ucx_ctx->ucp_peers[i].ucp_conn,
599+
mkey->u.data,
600+
&ucx_mkey->rkey);
601+
if (UCS_OK != err) {
602+
SPML_UCX_ERROR("failed to unpack rkey");
603+
goto error2;
604+
}
605+
mca_spml_ucx_cache_mkey(ucx_ctx, mkey, j, i);
601606
}
602-
mca_spml_ucx_cache_mkey(ucx_ctx, mkey, j, i);
603607
}
604608
}
605609

@@ -747,6 +751,8 @@ int mca_spml_ucx_fence(shmem_ctx_t ctx)
747751
ucs_status_t err;
748752
mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx;
749753

754+
opal_atomic_wmb();
755+
750756
err = ucp_worker_fence(ucx_ctx->ucp_worker);
751757
if (UCS_OK != err) {
752758
SPML_UCX_ERROR("fence failed: %s", ucs_status_string(err));
@@ -761,6 +767,8 @@ int mca_spml_ucx_quiet(shmem_ctx_t ctx)
761767
int ret;
762768
mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx;
763769

770+
opal_atomic_wmb();
771+
764772
ret = opal_common_ucx_worker_flush(ucx_ctx->ucp_worker);
765773
if (OMPI_SUCCESS != ret) {
766774
oshmem_shmem_abort(-1);

0 commit comments

Comments
 (0)