Skip to content

Commit 2514b6e

Browse files
authored
Merge pull request #13037 from hjelmn/clean_up_btl_sm_fbox_code_and_fix_edge_condition_that_can_cause_lost_messages
btl/sm: rewrite of fast box (per-peer receive buffers)
2 parents b072940 + 95f7141 commit 2514b6e

File tree

4 files changed

+225
-200
lines changed

4 files changed

+225
-200
lines changed

opal/mca/btl/sm/btl_sm_component.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
2222
* Copyright (c) 2018 Triad National Security, LLC. All rights
2323
* reserved.
24-
* Copyright (c) 2019-2021 Google, Inc. All rights reserved.
24+
* Copyright (c) 2019-2025 Google, Inc. All rights reserved.
2525
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
2626
* Copyright (c) 2022 IBM Corporation. All rights reserved.
2727
* Copyright (c) 2022 Computer Architecture and VLSI Systems (CARV)
@@ -36,6 +36,7 @@
3636

3737
#include "opal/mca/btl/base/btl_base_error.h"
3838
#include "opal/mca/threads/mutex.h"
39+
#include "opal/util/bit_ops.h"
3940
#include "opal/util/output.h"
4041
#include "opal/util/printf.h"
4142

@@ -182,7 +183,7 @@ static int mca_btl_sm_component_register(void)
182183

183184
mca_btl_sm_component.fbox_size = 4096;
184185
(void) mca_base_component_var_register(&mca_btl_sm_component.super.btl_version, "fbox_size",
185-
"Size of per-peer fast transfer buffers (default: 4k)",
186+
"Size of per-peer fast transfer buffers. Must be a power of two (default: 4k)",
186187
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0,
187188
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5,
188189
MCA_BASE_VAR_SCOPE_LOCAL,
@@ -324,8 +325,10 @@ mca_btl_sm_component_init(int *num_btls, bool enable_progress_threads, bool enab
324325
component->segment_size = (2 << 20);
325326
}
326327

327-
component->fbox_size = (component->fbox_size + MCA_BTL_SM_FBOX_ALIGNMENT_MASK)
328-
& ~MCA_BTL_SM_FBOX_ALIGNMENT_MASK;
328+
if (component->fbox_size & (component->fbox_size - 1)) {
329+
BTL_VERBOSE(("fast box size must be a power of two, rounding up to next power of two."));
330+
component->fbox_size = opal_next_poweroftwo_inclusive(component->fbox_size);
331+
}
329332

330333
if (component->segment_size > (1ul << MCA_BTL_SM_OFFSET_BITS)) {
331334
component->segment_size = 2ul << MCA_BTL_SM_OFFSET_BITS;

0 commit comments

Comments
 (0)