Skip to content

Commit f62d26d

Browse files
committed
btl/vader: use basic mpool type to handle frag/fbox allocation
This commit updates btl/vader to use an mpool for handling all shared memory allocations (frags, fboxes). Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
1 parent 6ffc7cc commit f62d26d

File tree

6 files changed

+61
-56
lines changed

6 files changed

+61
-56
lines changed

opal/mca/btl/vader/btl_vader.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
* Copyright (c) 2010-2018 Los Alamos National Security, LLC. All rights
1616
* reserved.
1717
* Copyright (c) 2015 Mellanox Technologies. All rights reserved.
18+
* Copyright (c) 2018 Triad National Security, LLC. All rights
19+
* reserved.
1820
*
1921
* $COPYRIGHT$
2022
*
@@ -53,6 +55,7 @@
5355
#include "opal/mca/rcache/rcache.h"
5456
#include "opal/mca/rcache/base/base.h"
5557
#include "opal/mca/btl/base/btl_base_error.h"
58+
#include "opal/mca/mpool/base/base.h"
5659
#include "opal/util/proc.h"
5760
#include "btl_vader_endpoint.h"
5861

@@ -112,16 +115,15 @@ struct mca_btl_vader_component_t {
112115
opal_mutex_t lock; /**< lock to protect concurrent updates to this structure's members */
113116
char *my_segment; /**< this rank's base pointer */
114117
size_t segment_size; /**< size of my_segment */
115-
size_t segment_offset; /**< start of unused portion of my_segment */
116118
int32_t num_smp_procs; /**< current number of smp procs on this host */
117119
opal_free_list_t vader_frags_eager; /**< free list of vader send frags */
118120
opal_free_list_t vader_frags_max_send; /**< free list of vader max send frags (large fragments) */
119121
opal_free_list_t vader_frags_user; /**< free list of small inline frags */
122+
opal_free_list_t vader_fboxes; /**< free list of available fast-boxes */
120123

121124
unsigned int fbox_threshold; /**< number of sends required before we setup a send fast box for a peer */
122125
unsigned int fbox_max; /**< maximum number of send fast boxes to allocate */
123126
unsigned int fbox_size; /**< size of each peer fast box allocation */
124-
unsigned int fbox_count; /**< number of send fast boxes allocated */
125127

126128
int single_copy_mechanism; /**< single copy mechanism to use */
127129

@@ -143,6 +145,7 @@ struct mca_btl_vader_component_t {
143145
#if OPAL_BTL_VADER_HAVE_KNEM
144146
unsigned int knem_dma_min; /**< minimum size to enable DMA for knem transfers (0 disables) */
145147
#endif
148+
mca_mpool_base_module_t *mpool;
146149
};
147150
typedef struct mca_btl_vader_component_t mca_btl_vader_component_t;
148151
OPAL_MODULE_DECLSPEC extern mca_btl_vader_component_t mca_btl_vader_component;

opal/mca/btl/vader/btl_vader_component.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
* Copyright (c) 2014-2018 Research Organization for Information Science
2020
* and Technology (RIST). All rights reserved.
2121
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
22+
* Copyright (c) 2018 Triad National Security, LLC. All rights
23+
* reserved.
2224
* $COPYRIGHT$
2325
*
2426
* Additional copyrights may follow
@@ -303,6 +305,7 @@ static int mca_btl_vader_component_open(void)
303305
OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_eager, opal_free_list_t);
304306
OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_user, opal_free_list_t);
305307
OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_max_send, opal_free_list_t);
308+
OBJ_CONSTRUCT(&mca_btl_vader_component.vader_fboxes, opal_free_list_t);
306309
OBJ_CONSTRUCT(&mca_btl_vader_component.lock, opal_mutex_t);
307310
OBJ_CONSTRUCT(&mca_btl_vader_component.pending_endpoints, opal_list_t);
308311
OBJ_CONSTRUCT(&mca_btl_vader_component.pending_fragments, opal_list_t);
@@ -323,6 +326,7 @@ static int mca_btl_vader_component_close(void)
323326
OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_eager);
324327
OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_user);
325328
OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_max_send);
329+
OBJ_DESTRUCT(&mca_btl_vader_component.vader_fboxes);
326330
OBJ_DESTRUCT(&mca_btl_vader_component.lock);
327331
OBJ_DESTRUCT(&mca_btl_vader_component.pending_endpoints);
328332
OBJ_DESTRUCT(&mca_btl_vader_component.pending_fragments);
@@ -523,7 +527,6 @@ static mca_btl_base_module_t **mca_btl_vader_component_init (int *num_btls,
523527

524528
/* no fast boxes allocated initially */
525529
component->num_fbox_in_endpoints = 0;
526-
component->fbox_count = 0;
527530

528531
mca_btl_vader_check_single_copy ();
529532

@@ -564,8 +567,6 @@ static mca_btl_base_module_t **mca_btl_vader_component_init (int *num_btls,
564567
}
565568
}
566569

567-
component->segment_offset = 0;
568-
569570
/* initialize my fifo */
570571
vader_fifo_init ((struct vader_fifo_t *) component->my_segment);
571572

opal/mca/btl/vader/btl_vader_endpoint.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
1414
* Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights
1515
* reserved.
16+
* Copyright (c) 2018 Triad National Security, LLC. All rights
17+
* reserved.
1618
* $COPYRIGHT$
1719
*
1820
* Additional copyrights may follow
@@ -58,6 +60,7 @@ typedef struct mca_btl_base_endpoint_t {
5860
uint32_t *startp; /**< pointer to location storing start offset */
5961
unsigned int start, end;
6062
uint16_t seq;
63+
opal_free_list_item_t *fbox; /**< fast-box free list item */
6164
} fbox_out;
6265

6366
int32_t peer_smp_rank; /**< my peer's SMP process rank. Used for accessing
@@ -101,13 +104,16 @@ static inline void mca_btl_vader_endpoint_setup_fbox_recv (struct mca_btl_base_e
101104
endpoint->fbox_in.buffer = base;
102105
}
103106

104-
static inline void mca_btl_vader_endpoint_setup_fbox_send (struct mca_btl_base_endpoint_t *endpoint, void *base)
107+
static inline void mca_btl_vader_endpoint_setup_fbox_send (struct mca_btl_base_endpoint_t *endpoint, opal_free_list_item_t *fbox)
105108
{
109+
void *base = fbox->ptr;
110+
106111
endpoint->fbox_out.start = MCA_BTL_VADER_FBOX_ALIGNMENT;
107112
endpoint->fbox_out.end = MCA_BTL_VADER_FBOX_ALIGNMENT;
108113
endpoint->fbox_out.startp = (uint32_t *) base;
109114
endpoint->fbox_out.startp[0] = MCA_BTL_VADER_FBOX_ALIGNMENT;
110115
endpoint->fbox_out.seq = 0;
116+
endpoint->fbox_out.fbox = fbox;
111117

112118
/* zero out the first header in the fast box */
113119
memset ((char *) base + MCA_BTL_VADER_FBOX_ALIGNMENT, 0, MCA_BTL_VADER_FBOX_ALIGNMENT);

opal/mca/btl/vader/btl_vader_fbox.h

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
/*
33
* Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights
44
* reserved.
5+
* Copyright (c) 2018 Triad National Security, LLC. All rights
6+
* reserved.
57
* $COPYRIGHT$
68
*
79
* Additional copyrights may follow
@@ -259,20 +261,17 @@ static inline void mca_btl_vader_try_fbox_setup (mca_btl_base_endpoint_t *ep, mc
259261
/* protect access to mca_btl_vader_component.segment_offset */
260262
OPAL_THREAD_LOCK(&mca_btl_vader_component.lock);
261263

262-
if (mca_btl_vader_component.segment_size >= mca_btl_vader_component.segment_offset + mca_btl_vader_component.fbox_size &&
263-
mca_btl_vader_component.fbox_max > mca_btl_vader_component.fbox_count) {
264-
/* verify the remote side will accept another fbox */
265-
if (0 <= opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, -1)) {
266-
void *fbox_base = mca_btl_vader_component.my_segment + mca_btl_vader_component.segment_offset;
267-
mca_btl_vader_component.segment_offset += mca_btl_vader_component.fbox_size;
264+
/* verify the remote side will accept another fbox */
265+
if (0 <= opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, -1)) {
266+
opal_free_list_item_t *fbox = opal_free_list_get (&mca_btl_vader_component.vader_fboxes);
268267

268+
if (NULL != fbox) {
269269
/* zero out the fast box */
270-
memset (fbox_base, 0, mca_btl_vader_component.fbox_size);
271-
mca_btl_vader_endpoint_setup_fbox_send (ep, fbox_base);
270+
memset (fbox->ptr, 0, mca_btl_vader_component.fbox_size);
271+
mca_btl_vader_endpoint_setup_fbox_send (ep, fbox);
272272

273273
hdr->flags |= MCA_BTL_VADER_FLAG_SETUP_FBOX;
274274
hdr->fbox_base = virtual2relative((char *) ep->fbox_out.buffer);
275-
++mca_btl_vader_component.fbox_count;
276275
} else {
277276
opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, 1);
278277
}

opal/mca/btl/vader/btl_vader_frag.c

Lines changed: 4 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
1414
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
1515
* reserved.
16+
* Copyright (c) 2018 Triad National Security, LLC. All rights
17+
* reserved.
1618
* $COPYRIGHT$
1719
*
1820
* Additional copyrights may follow
@@ -41,38 +43,9 @@ static inline void mca_btl_vader_frag_constructor (mca_btl_vader_frag_t *frag)
4143
int mca_btl_vader_frag_init (opal_free_list_item_t *item, void *ctx)
4244
{
4345
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) item;
44-
unsigned int data_size = (unsigned int)(uintptr_t) ctx;
45-
unsigned int frag_size = data_size + sizeof (mca_btl_vader_hdr_t);
46-
47-
/* ensure next fragment is aligned on a cache line */
48-
frag_size = (frag_size + 63) & ~63;
49-
50-
OPAL_THREAD_LOCK(&mca_btl_vader_component.lock);
51-
52-
if (data_size && mca_btl_vader_component.segment_size < mca_btl_vader_component.segment_offset + frag_size) {
53-
OPAL_THREAD_UNLOCK(&mca_btl_vader_component.lock);
54-
return OPAL_ERR_OUT_OF_RESOURCE;
55-
}
56-
57-
/* Set the list element here so we don't have to set it on the critical path. This only
58-
* works if each free list has its own unique fragment size and ALL free lists are initialized
59-
* with opal_free_list_init. */
60-
if (mca_btl_vader_component.max_inline_send == data_size) {
61-
frag->my_list = &mca_btl_vader_component.vader_frags_user;
62-
} else if (mca_btl_vader.super.btl_eager_limit == data_size) {
63-
frag->my_list = &mca_btl_vader_component.vader_frags_eager;
64-
} else if (mca_btl_vader.super.btl_max_send_size == data_size) {
65-
frag->my_list = &mca_btl_vader_component.vader_frags_max_send;
66-
}
67-
68-
if (data_size) {
69-
item->ptr = mca_btl_vader_component.my_segment + mca_btl_vader_component.segment_offset;
70-
mca_btl_vader_component.segment_offset += frag_size;
71-
}
72-
73-
OPAL_THREAD_UNLOCK(&mca_btl_vader_component.lock);
7446

75-
mca_btl_vader_frag_constructor ((mca_btl_vader_frag_t *) item);
47+
/* Set the list element here so we don't have to set it on the critical path */
48+
frag->my_list = (opal_free_list_t *) ctx;
7649

7750
return OPAL_SUCCESS;
7851
}

opal/mca/btl/vader/btl_vader_module.c

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -97,19 +97,32 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n)
9797
return OPAL_ERR_OUT_OF_RESOURCE;
9898
}
9999

100-
component->segment_offset = MCA_BTL_VADER_FIFO_SIZE;
100+
component->mpool = mca_mpool_basic_create ((void *) (component->my_segment + MCA_BTL_VADER_FIFO_SIZE),
101+
(unsigned long) (mca_btl_vader_component.segment_size - MCA_BTL_VADER_FIFO_SIZE), 64);
102+
if (NULL == component->mpool) {
103+
free (component->endpoints);
104+
return OPAL_ERR_OUT_OF_RESOURCE;
105+
}
106+
107+
rc = opal_free_list_init (&component->vader_fboxes, sizeof (opal_free_list_item_t), 8,
108+
OBJ_CLASS(opal_free_list_item_t), mca_btl_vader_component.fbox_size,
109+
opal_cache_line_size, 0, mca_btl_vader_component.fbox_max, 4,
110+
component->mpool, 0, NULL, NULL, NULL);
111+
if (OPAL_SUCCESS != rc) {
112+
return rc;
113+
}
101114

102115
/* initialize fragment descriptor free lists */
103116
/* initialize free list for small send and inline fragments */
104117
rc = opal_free_list_init (&component->vader_frags_user,
105118
sizeof(mca_btl_vader_frag_t),
106119
opal_cache_line_size, OBJ_CLASS(mca_btl_vader_frag_t),
107-
0, opal_cache_line_size,
120+
mca_btl_vader_component.max_inline_send, opal_cache_line_size,
108121
component->vader_free_list_num,
109122
component->vader_free_list_max,
110123
component->vader_free_list_inc,
111-
NULL, 0, NULL, mca_btl_vader_frag_init,
112-
(void *)(intptr_t) mca_btl_vader_component.max_inline_send);
124+
component->mpool, 0, NULL, mca_btl_vader_frag_init,
125+
&component->vader_frags_user);
113126
if (OPAL_SUCCESS != rc) {
114127
return rc;
115128
}
@@ -118,12 +131,12 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n)
118131
rc = opal_free_list_init (&component->vader_frags_eager,
119132
sizeof (mca_btl_vader_frag_t),
120133
opal_cache_line_size, OBJ_CLASS(mca_btl_vader_frag_t),
121-
0, opal_cache_line_size,
134+
mca_btl_vader.super.btl_eager_limit, opal_cache_line_size,
122135
component->vader_free_list_num,
123136
component->vader_free_list_max,
124137
component->vader_free_list_inc,
125-
NULL, 0, NULL, mca_btl_vader_frag_init,
126-
(void *)(intptr_t) mca_btl_vader.super.btl_eager_limit);
138+
component->mpool, 0, NULL, mca_btl_vader_frag_init,
139+
&component->vader_frags_eager);
127140
if (OPAL_SUCCESS != rc) {
128141
return rc;
129142
}
@@ -133,12 +146,12 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n)
133146
rc = opal_free_list_init (&component->vader_frags_max_send,
134147
sizeof (mca_btl_vader_frag_t),
135148
opal_cache_line_size, OBJ_CLASS(mca_btl_vader_frag_t),
136-
0, opal_cache_line_size,
149+
mca_btl_vader.super.btl_max_send_size, opal_cache_line_size,
137150
component->vader_free_list_num,
138151
component->vader_free_list_max,
139152
component->vader_free_list_inc,
140-
NULL, 0, NULL, mca_btl_vader_frag_init,
141-
(void *)(intptr_t) mca_btl_vader.super.btl_max_send_size);
153+
component->mpool, 0, NULL, mca_btl_vader_frag_init,
154+
&component->vader_frags_max_send);
142155
if (OPAL_SUCCESS != rc) {
143156
return rc;
144157
}
@@ -367,6 +380,11 @@ static int vader_finalize(struct mca_btl_base_module_t *btl)
367380
}
368381
#endif
369382

383+
if (component->mpool) {
384+
component->mpool->mpool_finalize (component->mpool);
385+
component->mpool = NULL;
386+
}
387+
370388
return OPAL_SUCCESS;
371389
}
372390

@@ -536,6 +554,7 @@ static void mca_btl_vader_endpoint_constructor (mca_btl_vader_endpoint_t *ep)
536554
OBJ_CONSTRUCT(&ep->pending_frags, opal_list_t);
537555
OBJ_CONSTRUCT(&ep->pending_frags_lock, opal_mutex_t);
538556
ep->fifo = NULL;
557+
ep->fbox_out.fbox = NULL;
539558
}
540559

541560
#if OPAL_BTL_VADER_HAVE_XPMEM
@@ -564,8 +583,12 @@ static void mca_btl_vader_endpoint_destructor (mca_btl_vader_endpoint_t *ep)
564583
/* disconnect from the peer's segment */
565584
opal_shmem_segment_detach (&seg_ds);
566585
}
586+
if (ep->fbox_out.fbox) {
587+
opal_free_list_return (&mca_btl_vader_component.vader_fboxes, ep->fbox_out.fbox);
588+
}
567589

568590
ep->fbox_in.buffer = ep->fbox_out.buffer = NULL;
591+
ep->fbox_out.fbox = NULL;
569592
ep->segment_base = NULL;
570593
ep->fifo = NULL;
571594
}

0 commit comments

Comments
 (0)