Skip to content

Commit d69d4c8

Browse files
committed
add zero parameter to primitive allocation to improve codegen for calloc etc
1 parent 9d69e3e commit d69d4c8

File tree

5 files changed

+77
-63
lines changed

5 files changed

+77
-63
lines changed

include/mimalloc-internal.h

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ void _mi_abandoned_await_readers(void);
106106

107107

108108
// "page.c"
109-
void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc;
109+
void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept mi_attr_malloc;
110110

111111
void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; // free the page if there are no other pages with many free blocks
112112
void _mi_page_unfull(mi_page_t* page);
@@ -138,12 +138,11 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start);
138138
mi_msecs_t _mi_clock_start(void);
139139

140140
// "alloc.c"
141-
void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_malloc_generic`
141+
void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic`
142142
void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept;
143143
void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept;
144144
mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p);
145145
bool _mi_free_delayed_block(mi_block_t* block);
146-
void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size);
147146

148147
#if MI_DEBUG>1
149148
bool _mi_page_is_valid(mi_page_t* page);
@@ -267,8 +266,8 @@ static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
267266
static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
268267
#define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX)
269268
*total = count * size;
270-
return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW)
271-
&& size > 0 && (SIZE_MAX / size) < count);
269+
// note: gcc/clang optimize this to directly check the overflow flag
270+
return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) && size > 0 && (SIZE_MAX / size) < count);
272271
}
273272
#endif
274273

@@ -279,7 +278,7 @@ static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* tot
279278
return false;
280279
}
281280
else if (mi_unlikely(mi_mul_overflow(count, size, total))) {
282-
#if !defined(NDEBUG)
281+
#if MI_DEBUG > 0
283282
_mi_error_message(EOVERFLOW, "allocation request is too large (%zu * %zu bytes)\n", count, size);
284283
#endif
285284
*total = SIZE_MAX;
@@ -925,14 +924,25 @@ static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
925924
__movsb((unsigned char*)dst, (const unsigned char*)src, n);
926925
}
927926
else {
928-
memcpy(dst, src, n); // todo: use noinline?
927+
memcpy(dst, src, n);
928+
}
929+
}
930+
static inline void _mi_memzero(void* dst, size_t n) {
931+
if (_mi_cpu_has_fsrm) {
932+
__stosb((unsigned char*)dst, 0, n);
933+
}
934+
else {
935+
memset(dst, 0, n);
929936
}
930937
}
931938
#else
932939
#include <string.h>
933940
static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
934941
memcpy(dst, src, n);
935942
}
943+
static inline void _mi_memzero(void* dst, size_t n) {
944+
memset(dst, 0, n);
945+
}
936946
#endif
937947

938948

@@ -950,12 +960,23 @@ static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
950960
const void* asrc = __builtin_assume_aligned(src, MI_INTPTR_SIZE);
951961
_mi_memcpy(adst, asrc, n);
952962
}
963+
964+
static inline void _mi_memzero_aligned(void* dst, size_t n) {
965+
mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0);
966+
void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE);
967+
_mi_memzero(adst, n);
968+
}
953969
#else
954970
// Default fallback on `_mi_memcpy`
955971
static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
956972
mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
957973
_mi_memcpy(dst, src, n);
958974
}
975+
976+
static inline void _mi_memzero_aligned(void* dst, size_t n) {
977+
mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0);
978+
_mi_memzero(dst, n);
979+
}
959980
#endif
960981

961982

src/alloc-aligned.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,9 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
7979
#if MI_STAT>1
8080
mi_heap_stat_increase(heap, malloc, size);
8181
#endif
82-
void* p = _mi_page_malloc(heap, page, padsize); // TODO: inline _mi_page_malloc
82+
void* p = _mi_page_malloc(heap, page, padsize, zero); // TODO: inline _mi_page_malloc
8383
mi_assert_internal(p != NULL);
8484
mi_assert_internal(((uintptr_t)p + offset) % alignment == 0);
85-
if (zero) { _mi_block_zero_init(page, p, size); }
8685
return p;
8786
}
8887
}

src/alloc.c

Lines changed: 35 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -25,22 +25,29 @@ terms of the MIT license. A copy of the license can be found in the file
2525

2626
// Fast allocation in a page: just pop from the free list.
2727
// Fall back to generic allocation only if the list is empty.
28-
extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept {
28+
extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept {
2929
mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size);
3030
mi_block_t* const block = page->free;
3131
if (mi_unlikely(block == NULL)) {
32-
return _mi_malloc_generic(heap, size);
32+
return _mi_malloc_generic(heap, size, zero);
3333
}
3434
mi_assert_internal(block != NULL && _mi_ptr_page(block) == page);
3535
// pop from the free list
3636
page->used++;
3737
page->free = mi_block_next(page, block);
3838
mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page);
3939

40+
// zero the block?
41+
if (mi_unlikely(zero)) {
42+
mi_assert_internal(page->xblock_size != 0); // do not call with zero'ing for huge blocks
43+
const size_t zsize = (mi_unlikely(page->is_zero) ? sizeof(block->next) : page->xblock_size);
44+
_mi_memzero_aligned(block, zsize);
45+
}
46+
4047
#if (MI_DEBUG>0)
41-
if (!page->is_zero) { memset(block, MI_DEBUG_UNINIT, size); }
48+
if (!page->is_zero && !zero) { memset(block, MI_DEBUG_UNINIT, size); }
4249
#elif (MI_SECURE!=0)
43-
block->next = 0; // don't leak internal data
50+
if (!zero) { block->next = 0; } // don't leak internal data
4451
#endif
4552

4653
#if (MI_STAT>0)
@@ -69,41 +76,45 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
6976
return block;
7077
}
7178

72-
// allocate a small block
73-
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept {
74-
mi_assert(heap!=NULL);
79+
static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
80+
mi_assert(heap != NULL);
7581
mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local
7682
mi_assert(size <= MI_SMALL_SIZE_MAX);
77-
#if (MI_PADDING)
83+
#if (MI_PADDING)
7884
if (size == 0) {
7985
size = sizeof(void*);
8086
}
81-
#endif
82-
mi_page_t* page = _mi_heap_get_free_small_page(heap,size + MI_PADDING_SIZE);
83-
void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE);
84-
mi_assert_internal(p==NULL || mi_usable_size(p) >= size);
85-
#if MI_STAT>1
87+
#endif
88+
mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE);
89+
void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE, zero);
90+
mi_assert_internal(p == NULL || mi_usable_size(p) >= size);
91+
#if MI_STAT>1
8692
if (p != NULL) {
8793
if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); }
8894
mi_heap_stat_increase(heap, malloc, mi_usable_size(p));
8995
}
90-
#endif
96+
#endif
9197
return p;
9298
}
9399

100+
// allocate a small block
101+
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept {
102+
return mi_heap_malloc_small_zero(heap, size, false);
103+
}
104+
94105
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept {
95106
return mi_heap_malloc_small(mi_get_default_heap(), size);
96107
}
97108

98109
// The main allocation function
99-
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
110+
mi_decl_nodiscard extern inline void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
100111
if (mi_likely(size <= MI_SMALL_SIZE_MAX)) {
101-
return mi_heap_malloc_small(heap, size);
112+
return mi_heap_malloc_small_zero(heap, size, zero);
102113
}
103114
else {
104115
mi_assert(heap!=NULL);
105-
mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local
106-
void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE); // note: size can overflow but it is detected in malloc_generic
116+
mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local
117+
void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero); // note: size can overflow but it is detected in malloc_generic
107118
mi_assert_internal(p == NULL || mi_usable_size(p) >= size);
108119
#if MI_STAT>1
109120
if (p != NULL) {
@@ -115,44 +126,17 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t*
115126
}
116127
}
117128

118-
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept {
119-
return mi_heap_malloc(mi_get_default_heap(), size);
129+
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
130+
return _mi_heap_malloc_zero(heap, size, false);
120131
}
121132

122-
123-
void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) {
124-
// note: we need to initialize the whole usable block size to zero, not just the requested size,
125-
// or the recalloc/rezalloc functions cannot safely expand in place (see issue #63)
126-
MI_UNUSED(size);
127-
mi_assert_internal(p != NULL);
128-
mi_assert_internal(mi_usable_size(p) >= size); // size can be zero
129-
mi_assert_internal(_mi_ptr_page(p)==page);
130-
if (page->is_zero && size > sizeof(mi_block_t)) {
131-
// already zero initialized memory
132-
((mi_block_t*)p)->next = 0; // clear the free list pointer
133-
mi_assert_expensive(mi_mem_is_zero(p, mi_usable_size(p)));
134-
}
135-
else {
136-
// otherwise memset
137-
memset(p, 0, mi_usable_size(p));
138-
}
133+
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept {
134+
return mi_heap_malloc(mi_get_default_heap(), size);
139135
}
140136

141137
// zero initialized small block
142138
mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept {
143-
void* p = mi_malloc_small(size);
144-
if (p != NULL) {
145-
_mi_block_zero_init(_mi_ptr_page(p), p, size); // todo: can we avoid getting the page again?
146-
}
147-
return p;
148-
}
149-
150-
void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
151-
void* p = mi_heap_malloc(heap,size);
152-
if (zero && p != NULL) {
153-
_mi_block_zero_init(_mi_ptr_page(p),p,size); // todo: can we avoid getting the page again?
154-
}
155-
return p;
139+
return mi_heap_malloc_small_zero(mi_get_default_heap(), size, true);
156140
}
157141

158142
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
@@ -564,6 +548,7 @@ mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept {
564548
#ifdef __cplusplus
565549
void* _mi_externs[] = {
566550
(void*)&_mi_page_malloc,
551+
(void*)&_mi_heap_malloc_zero,
567552
(void*)&mi_malloc,
568553
(void*)&mi_malloc_small,
569554
(void*)&mi_zalloc_small,

src/page.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -815,7 +815,7 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size) mi_attr_noexcept {
815815

816816
// Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed.
817817
// Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
818-
void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept
818+
void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept
819819
{
820820
mi_assert_internal(heap != NULL);
821821

@@ -849,6 +849,15 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept
849849
mi_assert_internal(mi_page_immediate_available(page));
850850
mi_assert_internal(mi_page_block_size(page) >= size);
851851

852-
// and try again, this time succeeding! (i.e. this should never recurse)
853-
return _mi_page_malloc(heap, page, size);
852+
// and try again, this time succeeding! (i.e. this should never recurse through _mi_page_malloc)
853+
if (mi_unlikely(zero && page->xblock_size == 0)) {
854+
// note: we cannot call _mi_page_malloc with zeroing for huge blocks; we zero it afterwards in that case.
855+
void* p = _mi_page_malloc(heap, page, size, false);
856+
mi_assert_internal(p != NULL);
857+
_mi_memzero_aligned(p, mi_page_usable_block_size(page));
858+
return p;
859+
}
860+
else {
861+
return _mi_page_malloc(heap, page, size, zero);
862+
}
854863
}

test/test-stress.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ int main(int argc, char** argv) {
256256
mi_collect(true);
257257
#endif
258258
mi_stats_print(NULL);
259-
#endif
259+
#endif
260260
//bench_end_program();
261261
return 0;
262262
}

0 commit comments

Comments
 (0)