From 2efb7f7b6ab34c97d412eeed1d915beae39317da Mon Sep 17 00:00:00 2001 From: scooletz Date: Mon, 12 May 2025 11:35:39 +0200 Subject: [PATCH 1/3] branch less Udivrem --- src/Nethermind.Int256/UInt256.cs | 47 ++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/src/Nethermind.Int256/UInt256.cs b/src/Nethermind.Int256/UInt256.cs index c3e25f2..f336358 100644 --- a/src/Nethermind.Int256/UInt256.cs +++ b/src/Nethermind.Int256/UInt256.cs @@ -667,27 +667,38 @@ internal static ulong Rsh(ulong a, int n) [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void Udivrem(ref ulong quot, ref ulong u, int length, in UInt256 d, out UInt256 rem) { - int dLen = 0; - int shift = 0; - if (d.u3 != 0) - { - dLen = 4; - shift = LeadingZeros(d.u3); - } - else if (d.u2 != 0) - { - dLen = 3; - shift = LeadingZeros(d.u2); - } - else if (d.u1 != 0) + Unsafe.SkipInit(out int dLen); + Unsafe.SkipInit(out int shift); + + if (Vector256.IsHardwareAccelerated) { - dLen = 2; - shift = LeadingZeros(d.u1); + Vector256 v = Vector256.LoadUnsafe(in d.u0); + var isZero = Vector256.IsZero(v); + dLen = 32 - BitOperations.LeadingZeroCount(~isZero.ExtractMostSignificantBits() & 0b1111); + shift = LeadingZeros(Unsafe.Add(ref Unsafe.AsRef(in d.u0), dLen - 1)); } - else if (d.u0 != 0) + else { - dLen = 1; - shift = LeadingZeros(d.u0); + if (d.u3 != 0) + { + dLen = 4; + shift = LeadingZeros(d.u3); + } + else if (d.u2 != 0) + { + dLen = 3; + shift = LeadingZeros(d.u2); + } + else if (d.u1 != 0) + { + dLen = 2; + shift = LeadingZeros(d.u1); + } + else if (d.u0 != 0) + { + dLen = 1; + shift = LeadingZeros(d.u0); + } } int uLen = 0; From 0e906397b760b2c606d7a2cf67f7d2c7611ff2c5 Mon Sep 17 00:00:00 2001 From: scooletz Date: Mon, 12 May 2025 12:41:08 +0200 Subject: [PATCH 2/3] some comments --- src/Nethermind.Int256/UInt256.cs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Nethermind.Int256/UInt256.cs b/src/Nethermind.Int256/UInt256.cs index f336358..50f075a 100644 --- a/src/Nethermind.Int256/UInt256.cs +++ b/src/Nethermind.Int256/UInt256.cs @@ -672,8 +672,13 @@ private static void Udivrem(ref ulong quot, ref ulong u, int length, in UInt256 if (Vector256.IsHardwareAccelerated) { + // Use the fact that u0, u1, u2, u3 can be loaded as a vector Vector256 v = Vector256.LoadUnsafe(in d.u0); + + // Check which is zero var isZero = Vector256.IsZero(v); + + // Use most significant bits, negation and masking with 4 bits to find the most significant set dLen = 32 - BitOperations.LeadingZeroCount(~isZero.ExtractMostSignificantBits() & 0b1111); shift = LeadingZeros(Unsafe.Add(ref Unsafe.AsRef(in d.u0), dLen - 1)); } From 8b754ebccca05b52e7873cae018dec59e7102ce0 Mon Sep 17 00:00:00 2001 From: scooletz Date: Tue, 13 May 2025 09:53:16 +0200 Subject: [PATCH 3/3] readability --- src/Nethermind.Int256/UInt256.cs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Nethermind.Int256/UInt256.cs b/src/Nethermind.Int256/UInt256.cs index 50f075a..1b73d86 100644 --- a/src/Nethermind.Int256/UInt256.cs +++ b/src/Nethermind.Int256/UInt256.cs @@ -675,11 +675,15 @@ private static void Udivrem(ref ulong quot, ref ulong u, int length, in UInt256 // Use the fact that u0, u1, u2, u3 can be loaded as a vector Vector256 v = Vector256.LoadUnsafe(in d.u0); - // Check which is zero + // Check which ulongs are zero var isZero = Vector256.IsZero(v); - // Use most significant bits, negation and masking with 4 bits to find the most significant set - dLen = 32 - BitOperations.LeadingZeroCount(~isZero.ExtractMostSignificantBits() & 0b1111); + const int ulongCount = 4; + const uint mask = (1 << ulongCount) - 1; + + // The nth most significant bit is 1 if a nth ulong is 0. Negate and mask with 4 bits to find the most significant set. + var nonZeroUlongBits = ~isZero.ExtractMostSignificantBits() & mask; + dLen = 32 - BitOperations.LeadingZeroCount(nonZeroUlongBits); shift = LeadingZeros(Unsafe.Add(ref Unsafe.AsRef(in d.u0), dLen - 1)); } else