diff --git a/src/Nethermind.Int256/UInt256.cs b/src/Nethermind.Int256/UInt256.cs index c3e25f2..1b73d86 100644 --- a/src/Nethermind.Int256/UInt256.cs +++ b/src/Nethermind.Int256/UInt256.cs @@ -667,27 +667,47 @@ internal static ulong Rsh(ulong a, int n) [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void Udivrem(ref ulong quot, ref ulong u, int length, in UInt256 d, out UInt256 rem) { - int dLen = 0; - int shift = 0; - if (d.u3 != 0) - { - dLen = 4; - shift = LeadingZeros(d.u3); - } - else if (d.u2 != 0) - { - dLen = 3; - shift = LeadingZeros(d.u2); - } - else if (d.u1 != 0) + Unsafe.SkipInit(out int dLen); + Unsafe.SkipInit(out int shift); + + if (Vector256.IsHardwareAccelerated) { - dLen = 2; - shift = LeadingZeros(d.u1); + // Use the fact that u0, u1, u2, u3 can be loaded as a vector + Vector256 v = Vector256.LoadUnsafe(in d.u0); + + // Check which ulongs are zero + var isZero = Vector256.IsZero(v); + + const int ulongCount = 4; + const uint mask = (1 << ulongCount) - 1; + + // The nth most significant bit is 1 if a nth ulong is 0. Negate and mask with 4 bits to find the most significant set. + var nonZeroUlongBits = ~isZero.ExtractMostSignificantBits() & mask; + dLen = 32 - BitOperations.LeadingZeroCount(nonZeroUlongBits); + shift = LeadingZeros(Unsafe.Add(ref Unsafe.AsRef(in d.u0), dLen - 1)); } - else if (d.u0 != 0) + else { - dLen = 1; - shift = LeadingZeros(d.u0); + if (d.u3 != 0) + { + dLen = 4; + shift = LeadingZeros(d.u3); + } + else if (d.u2 != 0) + { + dLen = 3; + shift = LeadingZeros(d.u2); + } + else if (d.u1 != 0) + { + dLen = 2; + shift = LeadingZeros(d.u1); + } + else if (d.u0 != 0) + { + dLen = 1; + shift = LeadingZeros(d.u0); + } } int uLen = 0;