@@ -698,90 +698,104 @@ static inline size_t _mi_os_numa_node_count(void) {
698
698
// -------------------------------------------------------------------
699
699
// Getting the thread id should be performant as it is called in the
700
700
// fast path of `_mi_free` and we specialize for various platforms.
701
+ // We only require _mi_threadid() to return a unique id for each thread.
701
702
// -------------------------------------------------------------------
702
703
#if defined(_WIN32 )
704
+
703
705
#define WIN32_LEAN_AND_MEAN
704
706
#include <windows.h>
705
707
static inline mi_threadid_t _mi_thread_id (void ) mi_attr_noexcept {
706
708
// Windows: works on Intel and ARM in both 32- and 64-bit
707
709
return (uintptr_t )NtCurrentTeb ();
708
710
}
709
711
710
- #elif defined( __GNUC__ ) && \
711
- (defined( __x86_64__ ) || defined( __i386__ ) || defined( __aarch64__ ))
712
-
712
+ // We use assembly for a fast thread id on the main platforms. The TLS layout depends on
713
+ // both the OS and libc implementation so we use specific tests for each main platform.
714
+ // If you test on another platform and it works please send a PR :-)
713
715
// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register.
716
+ #elif defined(__GNUC__ ) && ( \
717
+ (defined(__GLIBC__ ) && (defined(__x86_64__ ) || defined(__i386__ ) || defined(__arm__ ) || defined(__aarch64__ ))) \
718
+ || (defined(__APPLE__ ) && (defined(__x86_64__ ) || defined(__aarch64__ ))) \
719
+ || (defined(__BIONIC__ ) && (defined(__x86_64__ ) || defined(__i386__ ) || defined(__arm__ ) || defined(__aarch64__ ))) \
720
+ || (defined(__FreeBSD__ ) && (defined(__x86_64__ ) || defined(__i386__ ) || defined(__aarch64__ ))) \
721
+ )
722
+
714
723
static inline void * mi_tls_slot (size_t slot ) mi_attr_noexcept {
715
724
void * res ;
716
725
const size_t ofs = (slot * sizeof (void * ));
717
- #if defined(__i386__ )
718
- __asm__("movl %%gs:%1, %0" : "=r" (res ) : "m" (* ((void * * )ofs )) : ); // x86 32-bit always uses GS
719
- #elif defined(__APPLE__ ) && defined(__x86_64__ )
720
- __asm__("movq %%gs:%1, %0" : "=r" (res ) : "m" (* ((void * * )ofs )) : ); // x86_64 macOSX uses GS
721
- #elif defined(__x86_64__ ) && (MI_INTPTR_SIZE == 4 )
722
- __asm__("movl %%fs:%1, %0" : "=r" (res ) : "m" (* ((void * * )ofs )) : ); // x32 ABI
723
- #elif defined(__x86_64__ )
724
- __asm__("movq %%fs:%1, %0" : "=r" (res ) : "m" (* ((void * * )ofs )) : ); // x86_64 Linux, BSD uses FS
725
- #elif defined(__arm__ ) // arm32: defined but currently not used (see issue #495)
726
- void * * tcb ; MI_UNUSED (ofs );
727
- __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb ));
728
- res = tcb [slot ];
729
- #elif defined(__aarch64__ )
730
- void * * tcb ; MI_UNUSED (ofs );
731
- #if defined(__APPLE__ ) // M1, issue #343
732
- __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb ));
733
- tcb = (void * * )((uintptr_t )tcb & ~0x07UL ); // clear lower 3 bits
734
- #else
735
- __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb ));
726
+ #if defined(__i386__ )
727
+ __asm__("movl %%gs:%1, %0" : "=r" (res ) : "m" (* ((void * * )ofs )) : ); // x86 32-bit always uses GS
728
+ #elif defined(__APPLE__ ) && defined(__x86_64__ )
729
+ __asm__("movq %%gs:%1, %0" : "=r" (res ) : "m" (* ((void * * )ofs )) : ); // x86_64 macOSX uses GS
730
+ #elif defined(__x86_64__ ) && (MI_INTPTR_SIZE == 4 )
731
+ __asm__("movl %%fs:%1, %0" : "=r" (res ) : "m" (* ((void * * )ofs )) : ); // x32 ABI
732
+ #elif defined(__x86_64__ )
733
+ __asm__("movq %%fs:%1, %0" : "=r" (res ) : "m" (* ((void * * )ofs )) : ); // x86_64 Linux, BSD uses FS
734
+ #elif defined(__arm__ )
735
+ void * * tcb ; MI_UNUSED (ofs );
736
+ __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb ));
737
+ res = tcb [slot ];
738
+ #elif defined(__aarch64__ )
739
+ void * * tcb ; MI_UNUSED (ofs );
740
+ #if defined(__APPLE__ ) // M1, issue #343
741
+ __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb ));
742
+ #else
743
+ __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb ));
744
+ #endif
745
+ res = tcb [slot ];
736
746
#endif
737
- res = tcb [slot ];
738
- #endif
739
747
return res ;
740
748
}
741
749
742
- // setting a tls slot is only used on macOSX for now
750
+ // setting a tls slot is only used on macOS for now
743
751
static inline void mi_tls_slot_set (size_t slot , void * value ) mi_attr_noexcept {
744
752
const size_t ofs = (slot * sizeof (void * ));
745
- #if defined(__i386__ )
746
- __asm__("movl %1,%%gs:%0" : "=m" (* ((void * * )ofs )) : "rn" (value ) : ); // 32-bit always uses GS
747
- #elif defined(__APPLE__ ) && defined(__x86_64__ )
748
- __asm__("movq %1,%%gs:%0" : "=m" (* ((void * * )ofs )) : "rn" (value ) : ); // x86_64 macOSX uses GS
749
- #elif defined(__x86_64__ ) && (MI_INTPTR_SIZE == 4 )
750
- __asm__("movl %1,%%fs:%0" : "=m" (* ((void * * )ofs )) : "rn" (value ) : ); // x32 ABI
751
- #elif defined(__x86_64__ )
752
- __asm__("movq %1,%%fs:%0" : "=m" (* ((void * * )ofs )) : "rn" (value ) : ); // x86_64 Linux, BSD uses FS
753
- #elif defined(__arm__ )
754
- void * * tcb ; MI_UNUSED (ofs );
755
- __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb ));
756
- tcb [slot ] = value ;
757
- #elif defined(__aarch64__ )
758
- void * * tcb ; MI_UNUSED (ofs );
759
- #if defined(__APPLE__ ) // M1, issue #343
760
- __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb ));
761
- tcb = (void * * )((uintptr_t )tcb & ~0x07UL ); // clear lower 3 bits
762
- #else
763
- __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb ));
753
+ #if defined(__i386__ )
754
+ __asm__("movl %1,%%gs:%0" : "=m" (* ((void * * )ofs )) : "rn" (value ) : ); // 32-bit always uses GS
755
+ #elif defined(__APPLE__ ) && defined(__x86_64__ )
756
+ __asm__("movq %1,%%gs:%0" : "=m" (* ((void * * )ofs )) : "rn" (value ) : ); // x86_64 macOS uses GS
757
+ #elif defined(__x86_64__ ) && (MI_INTPTR_SIZE == 4 )
758
+ __asm__("movl %1,%%fs:%0" : "=m" (* ((void * * )ofs )) : "rn" (value ) : ); // x32 ABI
759
+ #elif defined(__x86_64__ )
760
+ __asm__("movq %1,%%fs:%0" : "=m" (* ((void * * )ofs )) : "rn" (value ) : ); // x86_64 Linux, BSD uses FS
761
+ #elif defined(__arm__ )
762
+ void * * tcb ; MI_UNUSED (ofs );
763
+ __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb ));
764
+ tcb [slot ] = value ;
765
+ #elif defined(__aarch64__ )
766
+ void * * tcb ; MI_UNUSED (ofs );
767
+ #if defined(__APPLE__ ) // M1, issue #343
768
+ __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb ));
769
+ #else
770
+ __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb ));
771
+ #endif
772
+ tcb [slot ] = value ;
764
773
#endif
765
- tcb [slot ] = value ;
766
- #endif
767
774
}
768
775
769
776
static inline mi_threadid_t _mi_thread_id (void ) mi_attr_noexcept {
770
- #if defined(__ANDROID__ ) && (defined(__arm__ ) || defined(__aarch64__ ))
771
- // issue #384, #495: on arm Android, slot 1 is the thread ID (pointer to pthread internal struct)
772
- return (uintptr_t )mi_tls_slot (1 );
773
- #else
774
- // in all our other targets, slot 0 is the pointer to the thread control block
775
- return (uintptr_t )mi_tls_slot (0 );
776
- #endif
777
+ #if defined(__BIONIC__ )
778
+ // issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id
779
+ // see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86
780
+ return (uintptr_t )mi_tls_slot (1 );
781
+ #else
782
+ // in all our other targets, slot 0 is the thread id
783
+ // glibc: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/x86_64/nptl/tls.h
784
+ // apple: https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L36
785
+ return (uintptr_t )mi_tls_slot (0 );
786
+ #endif
777
787
}
788
+
778
789
#else
779
- // otherwise use portable C
790
+
791
+ // otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms).
780
792
static inline mi_threadid_t _mi_thread_id (void ) mi_attr_noexcept {
781
793
return (uintptr_t )& _mi_heap_default ;
782
794
}
795
+
783
796
#endif
784
797
798
+
785
799
// -----------------------------------------------------------------------
786
800
// Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero)
787
801
// -----------------------------------------------------------------------
0 commit comments