@@ -26,86 +26,101 @@ namespace __llvm_libc {
26
26
inline_memset_embedded_tiny (Ptr dst, uint8_t value, size_t count) {
27
27
LIBC_LOOP_NOUNROLL
28
28
for (size_t offset = 0 ; offset < count; ++offset)
29
- generic::Memset<1 , 1 >::block (dst + offset, value);
29
+ generic::Memset<uint8_t >::block (dst + offset, value);
30
30
}
31
31
32
32
#if defined(LIBC_TARGET_ARCH_IS_X86)
33
- template <size_t MaxSize>
34
33
[[maybe_unused]] LIBC_INLINE static void
35
34
inline_memset_x86 (Ptr dst, uint8_t value, size_t count) {
35
+ #if defined(__AVX512F__)
36
+ using uint128_t = uint8x16_t ;
37
+ using uint256_t = uint8x32_t ;
38
+ using uint512_t = uint8x64_t ;
39
+ #elif defined(__AVX__)
40
+ using uint128_t = uint8x16_t ;
41
+ using uint256_t = uint8x32_t ;
42
+ using uint512_t = cpp::array<uint8x32_t , 2 >;
43
+ #elif defined(__SSE2__)
44
+ using uint128_t = uint8x16_t ;
45
+ using uint256_t = cpp::array<uint8x16_t , 2 >;
46
+ using uint512_t = cpp::array<uint8x16_t , 4 >;
47
+ #else
48
+ using uint128_t = cpp::array<uint64_t , 2 >;
49
+ using uint256_t = cpp::array<uint64_t , 4 >;
50
+ using uint512_t = cpp::array<uint64_t , 8 >;
51
+ #endif
52
+
36
53
if (count == 0 )
37
54
return ;
38
55
if (count == 1 )
39
- return generic::Memset<1 , MaxSize >::block (dst, value);
56
+ return generic::Memset<uint8_t >::block (dst, value);
40
57
if (count == 2 )
41
- return generic::Memset<2 , MaxSize >::block (dst, value);
58
+ return generic::Memset<uint16_t >::block (dst, value);
42
59
if (count == 3 )
43
- return generic::Memset<3 , MaxSize >::block (dst, value);
60
+ return generic::Memset<uint16_t , uint8_t >::block (dst, value);
44
61
if (count <= 8 )
45
- return generic::Memset<4 , MaxSize >::head_tail (dst, value, count);
62
+ return generic::Memset<uint32_t >::head_tail (dst, value, count);
46
63
if (count <= 16 )
47
- return generic::Memset<8 , MaxSize >::head_tail (dst, value, count);
64
+ return generic::Memset<uint64_t >::head_tail (dst, value, count);
48
65
if (count <= 32 )
49
- return generic::Memset<16 , MaxSize >::head_tail (dst, value, count);
66
+ return generic::Memset<uint128_t >::head_tail (dst, value, count);
50
67
if (count <= 64 )
51
- return generic::Memset<32 , MaxSize >::head_tail (dst, value, count);
68
+ return generic::Memset<uint256_t >::head_tail (dst, value, count);
52
69
if (count <= 128 )
53
- return generic::Memset<64 , MaxSize >::head_tail (dst, value, count);
70
+ return generic::Memset<uint512_t >::head_tail (dst, value, count);
54
71
// Aligned loop
55
- generic::Memset<32 , MaxSize >::block (dst, value);
72
+ generic::Memset<uint256_t >::block (dst, value);
56
73
align_to_next_boundary<32 >(dst, count);
57
- return generic::Memset<32 , MaxSize >::loop_and_tail (dst, value, count);
74
+ return generic::Memset<uint256_t >::loop_and_tail (dst, value, count);
58
75
}
59
76
#endif // defined(LIBC_TARGET_ARCH_IS_X86)
60
77
61
78
#if defined(LIBC_TARGET_ARCH_IS_AARCH64)
62
- template <size_t MaxSize>
63
79
[[maybe_unused]] LIBC_INLINE static void
64
80
inline_memset_aarch64 (Ptr dst, uint8_t value, size_t count) {
81
+ static_assert (aarch64::kNeon , " aarch64 supports vector types" );
82
+ using uint128_t = uint8x16_t ;
83
+ using uint256_t = uint8x32_t ;
84
+ using uint512_t = uint8x64_t ;
65
85
if (count == 0 )
66
86
return ;
67
87
if (count <= 3 ) {
68
- generic::Memset<1 , MaxSize >::block (dst, value);
88
+ generic::Memset<uint8_t >::block (dst, value);
69
89
if (count > 1 )
70
- generic::Memset<2 , MaxSize >::tail (dst, value, count);
90
+ generic::Memset<uint16_t >::tail (dst, value, count);
71
91
return ;
72
92
}
73
93
if (count <= 8 )
74
- return generic::Memset<4 , MaxSize >::head_tail (dst, value, count);
94
+ return generic::Memset<uint32_t >::head_tail (dst, value, count);
75
95
if (count <= 16 )
76
- return generic::Memset<8 , MaxSize >::head_tail (dst, value, count);
96
+ return generic::Memset<uint64_t >::head_tail (dst, value, count);
77
97
if (count <= 32 )
78
- return generic::Memset<16 , MaxSize >::head_tail (dst, value, count);
98
+ return generic::Memset<uint128_t >::head_tail (dst, value, count);
79
99
if (count <= (32 + 64 )) {
80
- generic::Memset<32 , MaxSize >::block (dst, value);
100
+ generic::Memset<uint256_t >::block (dst, value);
81
101
if (count <= 64 )
82
- return generic::Memset<32 , MaxSize >::tail (dst, value, count);
83
- generic::Memset<32 , MaxSize >::block (dst + 32 , value);
84
- generic::Memset<32 , MaxSize >::tail (dst, value, count);
102
+ return generic::Memset<uint256_t >::tail (dst, value, count);
103
+ generic::Memset<uint256_t >::block (dst + 32 , value);
104
+ generic::Memset<uint256_t >::tail (dst, value, count);
85
105
return ;
86
106
}
87
107
if (count >= 448 && value == 0 && aarch64::neon::hasZva ()) {
88
- generic::Memset<64 , MaxSize >::block (dst, 0 );
108
+ generic::Memset<uint512_t >::block (dst, 0 );
89
109
align_to_next_boundary<64 >(dst, count);
90
- return aarch64::neon::BzeroCacheLine< 64 > ::loop_and_tail (dst, 0 , count);
110
+ return aarch64::neon::BzeroCacheLine::loop_and_tail (dst, 0 , count);
91
111
} else {
92
- generic::Memset<16 , MaxSize >::block (dst, value);
112
+ generic::Memset<uint128_t >::block (dst, value);
93
113
align_to_next_boundary<16 >(dst, count);
94
- return generic::Memset<64 , MaxSize >::loop_and_tail (dst, value, count);
114
+ return generic::Memset<uint512_t >::loop_and_tail (dst, value, count);
95
115
}
96
116
}
97
117
#endif // defined(LIBC_TARGET_ARCH_IS_AARCH64)
98
118
99
119
LIBC_INLINE static void inline_memset (Ptr dst, uint8_t value, size_t count) {
100
120
#if defined(LIBC_TARGET_ARCH_IS_X86)
101
- static constexpr size_t kMaxSize = x86::kAvx512F ? 64
102
- : x86::kAvx ? 32
103
- : x86::kSse2 ? 16
104
- : 8 ;
105
- return inline_memset_x86<kMaxSize >(dst, value, count);
121
+ return inline_memset_x86 (dst, value, count);
106
122
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
107
- static constexpr size_t kMaxSize = aarch64::kNeon ? 16 : 8 ;
108
- return inline_memset_aarch64<kMaxSize >(dst, value, count);
123
+ return inline_memset_aarch64 (dst, value, count);
109
124
#else
110
125
return inline_memset_embedded_tiny (dst, value, count);
111
126
#endif
0 commit comments