Skip to content

Commit 43e5cd2

Browse files
committed
revise assembly test for the _mi_threadid(); fix issue #495 with musl libc on arm; test specifically for bionic libc on Android
1 parent 6ead284 commit 43e5cd2

File tree

1 file changed

+68
-54
lines changed

1 file changed

+68
-54
lines changed

include/mimalloc-internal.h

Lines changed: 68 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -698,90 +698,104 @@ static inline size_t _mi_os_numa_node_count(void) {
698698
// -------------------------------------------------------------------
699699
// Getting the thread id should be performant as it is called in the
700700
// fast path of `_mi_free` and we specialize for various platforms.
701+
// We only require _mi_threadid() to return a unique id for each thread.
701702
// -------------------------------------------------------------------
702703
#if defined(_WIN32)
704+
703705
#define WIN32_LEAN_AND_MEAN
704706
#include <windows.h>
705707
static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
706708
// Windows: works on Intel and ARM in both 32- and 64-bit
707709
return (uintptr_t)NtCurrentTeb();
708710
}
709711

710-
#elif defined(__GNUC__) && \
711-
(defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))
712-
712+
// We use assembly for a fast thread id on the main platforms. The TLS layout depends on
713+
// both the OS and libc implementation so we use specific tests for each main platform.
714+
// If you test on another platform and it works please send a PR :-)
713715
// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register.
716+
#elif defined(__GNUC__) && ( \
717+
(defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
718+
|| (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__))) \
719+
|| (defined(__BIONIC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
720+
|| (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
721+
)
722+
714723
static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept {
715724
void* res;
716725
const size_t ofs = (slot*sizeof(void*));
717-
#if defined(__i386__)
718-
__asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86 32-bit always uses GS
719-
#elif defined(__APPLE__) && defined(__x86_64__)
720-
__asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 macOSX uses GS
721-
#elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
722-
__asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x32 ABI
723-
#elif defined(__x86_64__)
724-
__asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 Linux, BSD uses FS
725-
#elif defined(__arm__) // arm32: defined but currently not used (see issue #495)
726-
void** tcb; MI_UNUSED(ofs);
727-
__asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
728-
res = tcb[slot];
729-
#elif defined(__aarch64__)
730-
void** tcb; MI_UNUSED(ofs);
731-
#if defined(__APPLE__) // M1, issue #343
732-
__asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb));
733-
tcb = (void**)((uintptr_t)tcb & ~0x07UL); // clear lower 3 bits
734-
#else
735-
__asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
726+
#if defined(__i386__)
727+
__asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86 32-bit always uses GS
728+
#elif defined(__APPLE__) && defined(__x86_64__)
729+
__asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 macOSX uses GS
730+
#elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
731+
__asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x32 ABI
732+
#elif defined(__x86_64__)
733+
__asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 Linux, BSD uses FS
734+
#elif defined(__arm__)
735+
void** tcb; MI_UNUSED(ofs);
736+
__asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
737+
res = tcb[slot];
738+
#elif defined(__aarch64__)
739+
void** tcb; MI_UNUSED(ofs);
740+
#if defined(__APPLE__) // M1, issue #343
741+
__asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb));
742+
#else
743+
__asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
744+
#endif
745+
res = tcb[slot];
736746
#endif
737-
res = tcb[slot];
738-
#endif
739747
return res;
740748
}
741749

742-
// setting a tls slot is only used on macOSX for now
750+
// setting a tls slot is only used on macOS for now
743751
static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept {
744752
const size_t ofs = (slot*sizeof(void*));
745-
#if defined(__i386__)
746-
__asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // 32-bit always uses GS
747-
#elif defined(__APPLE__) && defined(__x86_64__)
748-
__asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 macOSX uses GS
749-
#elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
750-
__asm__("movl %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x32 ABI
751-
#elif defined(__x86_64__)
752-
__asm__("movq %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 Linux, BSD uses FS
753-
#elif defined(__arm__)
754-
void** tcb; MI_UNUSED(ofs);
755-
__asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
756-
tcb[slot] = value;
757-
#elif defined(__aarch64__)
758-
void** tcb; MI_UNUSED(ofs);
759-
#if defined(__APPLE__) // M1, issue #343
760-
__asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb));
761-
tcb = (void**)((uintptr_t)tcb & ~0x07UL); // clear lower 3 bits
762-
#else
763-
__asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
753+
#if defined(__i386__)
754+
__asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // 32-bit always uses GS
755+
#elif defined(__APPLE__) && defined(__x86_64__)
756+
__asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 macOS uses GS
757+
#elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
758+
__asm__("movl %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x32 ABI
759+
#elif defined(__x86_64__)
760+
__asm__("movq %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 Linux, BSD uses FS
761+
#elif defined(__arm__)
762+
void** tcb; MI_UNUSED(ofs);
763+
__asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
764+
tcb[slot] = value;
765+
#elif defined(__aarch64__)
766+
void** tcb; MI_UNUSED(ofs);
767+
#if defined(__APPLE__) // M1, issue #343
768+
__asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb));
769+
#else
770+
__asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
771+
#endif
772+
tcb[slot] = value;
764773
#endif
765-
tcb[slot] = value;
766-
#endif
767774
}
768775

769776
static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
770-
#if defined(__ANDROID__) && (defined(__arm__) || defined(__aarch64__))
771-
// issue #384, #495: on arm Android, slot 1 is the thread ID (pointer to pthread internal struct)
772-
return (uintptr_t)mi_tls_slot(1);
773-
#else
774-
// in all our other targets, slot 0 is the pointer to the thread control block
775-
return (uintptr_t)mi_tls_slot(0);
776-
#endif
777+
#if defined(__BIONIC__)
778+
// issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id
779+
// see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86
780+
return (uintptr_t)mi_tls_slot(1);
781+
#else
782+
// in all our other targets, slot 0 is the thread id
783+
// glibc: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/x86_64/nptl/tls.h
784+
// apple: https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L36
785+
return (uintptr_t)mi_tls_slot(0);
786+
#endif
777787
}
788+
778789
#else
779-
// otherwise use portable C
790+
791+
// otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms).
780792
static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
781793
return (uintptr_t)&_mi_heap_default;
782794
}
795+
783796
#endif
784797

798+
785799
// -----------------------------------------------------------------------
786800
// Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero)
787801
// -----------------------------------------------------------------------

0 commit comments

Comments
 (0)