Skip to content

Commit 355a5d5

Browse files
committed
[libc][NFC] Use new approach based on types to code memmove
1 parent d827865 commit 355a5d5

File tree

2 files changed

+54
-83
lines changed

2 files changed

+54
-83
lines changed

libc/src/string/memory_utils/memmove_implementations.h

Lines changed: 32 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -34,73 +34,54 @@ inline_memmove_embedded_tiny(Ptr dst, CPtr src, size_t count) {
3434
}
3535
}
3636

37-
template <size_t MaxSize>
38-
[[maybe_unused]] LIBC_INLINE void inline_memmove_generic(Ptr dst, CPtr src,
39-
size_t count) {
40-
if (count == 0)
41-
return;
42-
if (count == 1)
43-
return generic::Memmove<1, MaxSize>::block(dst, src);
44-
if (count <= 4)
45-
return generic::Memmove<2, MaxSize>::head_tail(dst, src, count);
46-
if (count <= 8)
47-
return generic::Memmove<4, MaxSize>::head_tail(dst, src, count);
48-
if (count <= 16)
49-
return generic::Memmove<8, MaxSize>::head_tail(dst, src, count);
50-
if (count <= 32)
51-
return generic::Memmove<16, MaxSize>::head_tail(dst, src, count);
52-
if (count <= 64)
53-
return generic::Memmove<32, MaxSize>::head_tail(dst, src, count);
54-
if (count <= 128)
55-
return generic::Memmove<64, MaxSize>::head_tail(dst, src, count);
56-
if (dst < src) {
57-
generic::Memmove<32, MaxSize>::template align_forward<Arg::Src>(dst, src,
58-
count);
59-
return generic::Memmove<64, MaxSize>::loop_and_tail_forward(dst, src,
60-
count);
61-
} else {
62-
generic::Memmove<32, MaxSize>::template align_backward<Arg::Src>(dst, src,
63-
count);
64-
return generic::Memmove<64, MaxSize>::loop_and_tail_backward(dst, src,
65-
count);
66-
}
67-
}
68-
6937
LIBC_INLINE void inline_memmove(Ptr dst, CPtr src, size_t count) {
7038
#if defined(LIBC_TARGET_ARCH_IS_X86) || defined(LIBC_TARGET_ARCH_IS_AARCH64)
7139
#if defined(LIBC_TARGET_ARCH_IS_X86)
72-
static constexpr size_t kMaxSize = x86::kAvx512F ? 64
73-
: x86::kAvx ? 32
74-
: x86::kSse2 ? 16
75-
: 8;
40+
#if defined(__AVX512F__)
41+
using uint128_t = uint8x16_t;
42+
using uint256_t = uint8x32_t;
43+
using uint512_t = uint8x64_t;
44+
#elif defined(__AVX__)
45+
using uint128_t = uint8x16_t;
46+
using uint256_t = uint8x32_t;
47+
using uint512_t = cpp::array<uint8x32_t, 2>;
48+
#elif defined(__SSE2__)
49+
using uint128_t = uint8x16_t;
50+
using uint256_t = cpp::array<uint8x16_t, 2>;
51+
using uint512_t = cpp::array<uint8x16_t, 4>;
52+
#else
53+
using uint128_t = cpp::array<uint64_t, 2>;
54+
using uint256_t = cpp::array<uint64_t, 4>;
55+
using uint512_t = cpp::array<uint64_t, 8>;
56+
#endif
7657
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
77-
static constexpr size_t kMaxSize = aarch64::kNeon ? 16 : 8;
58+
static_assert(aarch64::kNeon, "aarch64 supports vector types");
59+
using uint128_t = uint8x16_t;
60+
using uint256_t = uint8x32_t;
61+
using uint512_t = uint8x64_t;
7862
#endif
79-
// return inline_memmove_generic<kMaxSize>(dst, src, count);
8063
if (count == 0)
8164
return;
8265
if (count == 1)
83-
return generic::Memmove<1, kMaxSize>::block(dst, src);
66+
return generic::Memmove<uint8_t>::block(dst, src);
8467
if (count <= 4)
85-
return generic::Memmove<2, kMaxSize>::head_tail(dst, src, count);
68+
return generic::Memmove<uint16_t>::head_tail(dst, src, count);
8669
if (count <= 8)
87-
return generic::Memmove<4, kMaxSize>::head_tail(dst, src, count);
70+
return generic::Memmove<uint32_t>::head_tail(dst, src, count);
8871
if (count <= 16)
89-
return generic::Memmove<8, kMaxSize>::head_tail(dst, src, count);
72+
return generic::Memmove<uint64_t>::head_tail(dst, src, count);
9073
if (count <= 32)
91-
return generic::Memmove<16, kMaxSize>::head_tail(dst, src, count);
74+
return generic::Memmove<uint128_t>::head_tail(dst, src, count);
9275
if (count <= 64)
93-
return generic::Memmove<32, kMaxSize>::head_tail(dst, src, count);
76+
return generic::Memmove<uint256_t>::head_tail(dst, src, count);
9477
if (count <= 128)
95-
return generic::Memmove<64, kMaxSize>::head_tail(dst, src, count);
78+
return generic::Memmove<uint512_t>::head_tail(dst, src, count);
9679
if (dst < src) {
97-
generic::Memmove<32, kMaxSize>::align_forward<Arg::Src>(dst, src, count);
98-
return generic::Memmove<64, kMaxSize>::loop_and_tail_forward(dst, src,
99-
count);
80+
generic::Memmove<uint256_t>::align_forward<Arg::Src>(dst, src, count);
81+
return generic::Memmove<uint512_t>::loop_and_tail_forward(dst, src, count);
10082
} else {
101-
generic::Memmove<32, kMaxSize>::align_backward<Arg::Src>(dst, src, count);
102-
return generic::Memmove<64, kMaxSize>::loop_and_tail_backward(dst, src,
103-
count);
83+
generic::Memmove<uint256_t>::align_backward<Arg::Src>(dst, src, count);
84+
return generic::Memmove<uint512_t>::loop_and_tail_backward(dst, src, count);
10485
}
10586
#else
10687
return inline_memmove_embedded_tiny(dst, src, count);

libc/src/string/memory_utils/op_generic.h

Lines changed: 22 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -254,32 +254,22 @@ template <typename T, typename... TS> struct Memset {
254254
// Memmove
255255
///////////////////////////////////////////////////////////////////////////////
256256

257-
template <size_t Size, size_t MaxSize> struct Memmove {
258-
static_assert(is_power2(MaxSize));
259-
using T = details::getTypeFor<Size, MaxSize>;
260-
static constexpr size_t SIZE = Size;
257+
template <typename T> struct Memmove {
258+
static constexpr size_t SIZE = sum_sizeof<T>();
261259

262260
LIBC_INLINE static void block(Ptr dst, CPtr src) {
263-
if constexpr (details::is_void_v<T>) {
264-
deferred_static_assert("Unimplemented Size");
265-
} else {
266-
store<T>(dst, load<T>(src));
267-
}
261+
store<T>(dst, load<T>(src));
268262
}
269263

270264
LIBC_INLINE static void head_tail(Ptr dst, CPtr src, size_t count) {
271-
const size_t offset = count - Size;
272-
if constexpr (details::is_void_v<T>) {
273-
deferred_static_assert("Unimplemented Size");
274-
} else {
275-
// The load and store operations can be performed in any order as long as
276-
// they are not interleaved. More investigations are needed to determine
277-
// the best order.
278-
const auto head = load<T>(src);
279-
const auto tail = load<T>(src + offset);
280-
store<T>(dst, head);
281-
store<T>(dst + offset, tail);
282-
}
265+
const size_t offset = count - SIZE;
266+
// The load and store operations can be performed in any order as long as
267+
// they are not interleaved. More investigations are needed to determine
268+
// the best order.
269+
const auto head = load<T>(src);
270+
const auto tail = load<T>(src + offset);
271+
store<T>(dst, head);
272+
store<T>(dst + offset, tail);
283273
}
284274

285275
// Align forward suitable when dst < src. The alignment is performed with
@@ -305,8 +295,8 @@ template <size_t Size, size_t MaxSize> struct Memmove {
305295
Ptr prev_dst = dst;
306296
CPtr prev_src = src;
307297
size_t prev_count = count;
308-
align_to_next_boundary<Size, AlignOn>(dst, src, count);
309-
adjust(Size, dst, src, count);
298+
align_to_next_boundary<SIZE, AlignOn>(dst, src, count);
299+
adjust(SIZE, dst, src, count);
310300
head_tail(prev_dst, prev_src, prev_count - count);
311301
}
312302

@@ -333,9 +323,9 @@ template <size_t Size, size_t MaxSize> struct Memmove {
333323
Ptr headtail_dst = dst + count;
334324
CPtr headtail_src = src + count;
335325
size_t headtail_size = 0;
336-
align_to_next_boundary<Size, AlignOn>(headtail_dst, headtail_src,
326+
align_to_next_boundary<SIZE, AlignOn>(headtail_dst, headtail_src,
337327
headtail_size);
338-
adjust(-2 * Size, headtail_dst, headtail_src, headtail_size);
328+
adjust(-2 * SIZE, headtail_dst, headtail_src, headtail_size);
339329
head_tail(headtail_dst, headtail_src, headtail_size);
340330
count -= headtail_size;
341331
}
@@ -356,15 +346,15 @@ template <size_t Size, size_t MaxSize> struct Memmove {
356346
// [_______________________SSSSSSSS_____]
357347
LIBC_INLINE static void loop_and_tail_forward(Ptr dst, CPtr src,
358348
size_t count) {
359-
static_assert(Size > 1, "a loop of size 1 does not need tail");
360-
const size_t tail_offset = count - Size;
349+
static_assert(SIZE > 1, "a loop of size 1 does not need tail");
350+
const size_t tail_offset = count - SIZE;
361351
const auto tail_value = load<T>(src + tail_offset);
362352
size_t offset = 0;
363353
LIBC_LOOP_NOUNROLL
364354
do {
365355
block(dst + offset, src + offset);
366-
offset += Size;
367-
} while (offset < count - Size);
356+
offset += SIZE;
357+
} while (offset < count - SIZE);
368358
store<T>(dst + tail_offset, tail_value);
369359
}
370360

@@ -384,13 +374,13 @@ template <size_t Size, size_t MaxSize> struct Memmove {
384374
// [_____SSSSSSSS_______________________]
385375
LIBC_INLINE static void loop_and_tail_backward(Ptr dst, CPtr src,
386376
size_t count) {
387-
static_assert(Size > 1, "a loop of size 1 does not need tail");
377+
static_assert(SIZE > 1, "a loop of size 1 does not need tail");
388378
const auto head_value = load<T>(src);
389-
ptrdiff_t offset = count - Size;
379+
ptrdiff_t offset = count - SIZE;
390380
LIBC_LOOP_NOUNROLL
391381
do {
392382
block(dst + offset, src + offset);
393-
offset -= Size;
383+
offset -= SIZE;
394384
} while (offset >= 0);
395385
store<T>(dst, head_value);
396386
}

0 commit comments

Comments
 (0)