Skip to content

Commit 50f40a5

Browse files
authored
[libc] Fix internal alignment in allcoator (#146738)
Summary: The allocator interface is supposed to have 16 byte alignment (to keep it consistent with the CPU allocator. We could probably drop this to 8 if desires.) But this was not enforced because the number of bytes used for the bitfield sometimes resulted in alignment of 8 instead of 16. Explicitly align the number of bytes to be a multiple of 16 even if unused.
1 parent e9be528 commit 50f40a5

File tree

3 files changed

+8
-5
lines changed

3 files changed

+8
-5
lines changed

libc/src/__support/GPU/allocator.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,9 @@ struct Slab {
189189

190190
// Get the number of bytes needed to contain the bitfield bits.
191191
constexpr static uint32_t bitfield_bytes(uint32_t chunk_size) {
192-
return ((num_chunks(chunk_size) + BITS_IN_WORD - 1) / BITS_IN_WORD) * 8;
192+
return __builtin_align_up(
193+
((num_chunks(chunk_size) + BITS_IN_WORD - 1) / BITS_IN_WORD) * 8,
194+
MIN_ALIGNMENT + 1);
193195
}
194196

195197
// The actual amount of memory available excluding the bitfield and metadata.
@@ -584,7 +586,7 @@ void *aligned_allocate(uint32_t alignment, uint64_t size) {
584586

585587
// If the requested alignment is less than what we already provide this is
586588
// just a normal allocation.
587-
if (alignment < MIN_ALIGNMENT + 1)
589+
if (alignment <= MIN_ALIGNMENT + 1)
588590
return gpu::allocate(size);
589591

590592
// We can't handle alignments greater than 2MiB so we simply fail.
@@ -594,7 +596,7 @@ void *aligned_allocate(uint32_t alignment, uint64_t size) {
594596
// Trying to handle allocation internally would break the assumption that each
595597
// chunk is identical to eachother. Allocate enough memory with worst-case
596598
// alignment and then round up. The index logic will round down properly.
597-
uint64_t rounded = size + alignment - 1;
599+
uint64_t rounded = size + alignment - MIN_ALIGNMENT;
598600
void *ptr = gpu::allocate(rounded);
599601
return __builtin_align_up(ptr, alignment);
600602
}

libc/test/integration/src/stdlib/gpu/aligned_alloc.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ TEST_MAIN(int, char **, char **) {
1010
// aligned_alloc with valid alignment and size
1111
void *ptr = LIBC_NAMESPACE::aligned_alloc(32, 16);
1212
EXPECT_NE(ptr, nullptr);
13-
EXPECT_EQ(__builtin_is_aligned(ptr, 32), 0U);
13+
EXPECT_TRUE(__builtin_is_aligned(ptr, 32));
1414

1515
LIBC_NAMESPACE::free(ptr);
1616

@@ -23,7 +23,7 @@ TEST_MAIN(int, char **, char **) {
2323
void *div =
2424
LIBC_NAMESPACE::aligned_alloc(alignment, (gpu::get_thread_id() + 1) * 4);
2525
EXPECT_NE(div, nullptr);
26-
EXPECT_EQ(__builtin_is_aligned(div, alignment), 0U);
26+
EXPECT_TRUE(__builtin_is_aligned(div, alignment));
2727

2828
return 0;
2929
}

libc/test/integration/src/stdlib/gpu/malloc.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ TEST_MAIN(int, char **, char **) {
2424
int *divergent = reinterpret_cast<int *>(
2525
LIBC_NAMESPACE::malloc((gpu::get_thread_id() + 1) * 16));
2626
EXPECT_NE(divergent, nullptr);
27+
EXPECT_TRUE(__builtin_is_aligned(divergent, 16));
2728
*divergent = 1;
2829
EXPECT_EQ(*divergent, 1);
2930
LIBC_NAMESPACE::free(divergent);

0 commit comments

Comments
 (0)