From f3f6b7902f1e72f28d9446f759c02ed0c00ba039 Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Thu, 9 Jan 2025 19:45:54 +0200
Subject: [PATCH 01/42] init

---
 contracts/libs/crypto/ECDSA384.sol          | 920 ++++----------------
 contracts/libs/crypto/bn/U512.sol           | 575 ++++++++++++
 contracts/mock/libs/crypto/ECDSA384Mock.sol |   9 +-
 hardhat.config.ts                           |   2 +-
 test/libs/crypto/ECDSA384.test.ts           |   2 +-
 5 files changed, 774 insertions(+), 734 deletions(-)
 create mode 100644 contracts/libs/crypto/bn/U512.sol

diff --git a/contracts/libs/crypto/ECDSA384.sol b/contracts/libs/crypto/ECDSA384.sol
index 9e43756d..12c6581d 100644
--- a/contracts/libs/crypto/ECDSA384.sol
+++ b/contracts/libs/crypto/ECDSA384.sol
@@ -1,8 +1,10 @@
 // SPDX-License-Identifier: MIT
 pragma solidity ^0.8.4;
 
+import {U512} from "./bn/U512.sol";
+import {uint512} from "./bn/U512.sol";
 import {MemoryUtils} from "../utils/MemoryUtils.sol";
-
+import "hardhat/console.sol";
 /**
  * @notice Cryptography module
  *
@@ -16,7 +18,7 @@ import {MemoryUtils} from "../utils/MemoryUtils.sol";
  */
 library ECDSA384 {
     using MemoryUtils for *;
-    using U384 for *;
+    using U512 for *;
 
     /**
      * @notice 384-bit curve parameters.
@@ -32,20 +34,20 @@ library ECDSA384 {
     }
 
     struct _Parameters {
-        uint256 a;
-        uint256 b;
-        uint256 gx;
-        uint256 gy;
-        uint256 p;
-        uint256 n;
-        uint256 lowSmax;
+        uint512 a;
+        uint512 b;
+        uint512 gx;
+        uint512 gy;
+        uint512 p;
+        uint512 n;
+        uint512 lowSmax;
     }
 
     struct _Inputs {
-        uint256 r;
-        uint256 s;
-        uint256 x;
-        uint256 y;
+        uint512 r;
+        uint512 s;
+        uint512 x;
+        uint512 y;
     }
 
     /**
@@ -67,8 +69,8 @@ library ECDSA384 {
         unchecked {
             _Inputs memory inputs_;
 
-            (inputs_.r, inputs_.s) = U384.init2(signature_);
-            (inputs_.x, inputs_.y) = U384.init2(pubKey_);
+            (inputs_.r, inputs_.s) = U512.init2(signature_);
+            (inputs_.x, inputs_.y) = U512.init2(pubKey_);
 
             _Parameters memory params_ = _Parameters({
                 a: curveParams_.a.init(),
@@ -80,14 +82,14 @@ library ECDSA384 {
                 lowSmax: curveParams_.lowSmax.init()
             });
 
-            uint256 call = U384.initCall(params_.p);
+            uint256 call = U512.initCall(params_.p);
 
             /// accept s only from the lower part of the curve
             if (
-                U384.eqInteger(inputs_.r, 0) ||
-                U384.cmp(inputs_.r, params_.n) >= 0 ||
-                U384.eqInteger(inputs_.s, 0) ||
-                U384.cmp(inputs_.s, params_.lowSmax) > 0
+                U512.eqInteger(inputs_.r, 0) ||
+                U512.cmp(inputs_.r, params_.n) >= 0 ||
+                U512.eqInteger(inputs_.s, 0) ||
+                U512.cmp(inputs_.s, params_.lowSmax) > 0
             ) {
                 return false;
             }
@@ -113,14 +115,14 @@ library ECDSA384 {
                 }
             }
 
-            uint256 scalar1 = U384.moddiv(call, hashedMessage_.init(), inputs_.s, params_.n);
-            uint256 scalar2 = U384.moddiv(call, inputs_.r, inputs_.s, params_.n);
+            uint512 scalar1 = U512.moddiv(call, hashedMessage_.init(), inputs_.s, params_.n);
+            uint512 scalar2 = U512.moddiv(call, inputs_.r, inputs_.s, params_.n);
 
             {
-                uint256 three = U384.init(3);
+                uint512 three = U512.init(3);
 
                 /// We use 6-bit masks where the first 3 bits refer to `scalar1` and the last 3 bits refer to `scalar2`.
-                uint256[2][64] memory points_ = _precomputePointsTable(
+                uint512[2][64] memory points_ = _precomputePointsTable(
                     call,
                     params_.p,
                     three,
@@ -142,9 +144,9 @@ library ECDSA384 {
                 );
             }
 
-            U384.modAssign(call, scalar1, params_.n);
+            U512.modAssign(call, scalar1, params_.n);
 
-            return U384.eq(scalar1, inputs_.r);
+            return U512.eq(scalar1, inputs_.r);
         }
     }
 
@@ -153,29 +155,29 @@ library ECDSA384 {
      */
     function _isOnCurve(
         uint256 call,
-        uint256 p,
-        uint256 a,
-        uint256 b,
-        uint256 x,
-        uint256 y
+        uint512 p,
+        uint512 a,
+        uint512 b,
+        uint512 x,
+        uint512 y
     ) private view returns (bool) {
         unchecked {
-            if (U384.eqInteger(x, 0) || U384.eq(x, p) || U384.eqInteger(y, 0) || U384.eq(y, p)) {
+            if (U512.eqInteger(x, 0) || U512.eq(x, p) || U512.eqInteger(y, 0) || U512.eq(y, p)) {
                 return false;
             }
 
-            uint256 LHS = U384.modexp(call, y, 2);
-            uint256 RHS = U384.modexp(call, x, 3);
+            uint512 LHS = U512.modexp(call, y, 2);
+            uint512 RHS = U512.modexp(call, x, 3);
 
-            if (!U384.eqInteger(a, 0)) {
-                RHS = U384.modadd(RHS, U384.modmul(call, x, a), p); // x^3 + a*x
+            if (!U512.eqInteger(a, 0)) {
+                RHS = U512.modadd(RHS, U512.modmul(call, x, a), p); // x^3 + a*x
             }
 
-            if (!U384.eqInteger(b, 0)) {
-                RHS = U384.modadd(RHS, b, p); // x^3 + a*x + b
+            if (!U512.eqInteger(b, 0)) {
+                RHS = U512.modadd(RHS, b, p); // x^3 + a*x + b
             }
 
-            return U384.eq(LHS, RHS);
+            return U512.eq(LHS, RHS);
         }
     }
 
@@ -184,39 +186,38 @@ library ECDSA384 {
      */
     function _doubleScalarMultiplication(
         uint256 call,
-        uint256 p,
-        uint256 three,
-        uint256 a,
-        uint256[2][64] memory points,
-        uint256 scalar1,
-        uint256 scalar2
-    ) private view returns (uint256 x, uint256 y) {
+        uint512 p,
+        uint512 three,
+        uint512 a,
+        uint512[2][64] memory points,
+        uint512 scalar1,
+        uint512 scalar2
+    ) private view returns (uint512 x, uint512 y) {
         unchecked {
-            uint256 mask_;
-            uint256 scalar1Bits_;
-            uint256 scalar2Bits_;
-
-            assembly {
-                scalar1Bits_ := mload(scalar1)
-                scalar2Bits_ := mload(scalar2)
-            }
-
-            (x, y) = _twiceAffine(call, p, three, a, x, y);
-
-            mask_ = ((scalar1Bits_ >> 183) << 3) | (scalar2Bits_ >> 183);
-
-            if (mask_ != 0) {
-                (x, y) = _addAffine(call, p, three, a, points[mask_][0], points[mask_][1], x, y);
-            }
+            x = U512.init();
+            y = U512.init();
 
-            for (uint256 word = 4; word <= 184; word += 3) {
-                (x, y) = _twice3Affine(call, p, three, a, x, y);
-
-                mask_ =
-                    (((scalar1Bits_ >> (184 - word)) & 0x07) << 3) |
-                    ((scalar2Bits_ >> (184 - word)) & 0x07);
+            uint256 mask_;
+            uint256 mask1_;
+            uint256 mask2_;
+            //
+            //            console.logBytes(scalar1.toBytes());
+            //            console.logBytes(scalar2.toBytes());
+            //
+            //            console.log(_getWord(scalar1, 384));
+            //            console.log(_getWord(scalar2, 384));
+            //
+            //            console.log(_getWord(scalar1, 383));
+            //            console.log(_getWord(scalar2, 383));
+
+            for (uint256 bit = 3; bit <= 384; bit += 3) {
+                mask1_ = _getWord(scalar1, 384 - bit);
+                mask2_ = _getWord(scalar2, 384 - bit);
+
+                mask_ = (mask1_ << 3) | mask2_;
 
                 if (mask_ != 0) {
+                    (x, y) = _twice3Affine(call, p, three, a, x, y);
                     (x, y) = _addAffine(
                         call,
                         p,
@@ -229,40 +230,25 @@ library ECDSA384 {
                     );
                 }
             }
+        }
+    }
 
-            assembly {
-                scalar1Bits_ := mload(add(scalar1, 0x20))
-                scalar2Bits_ := mload(add(scalar2, 0x20))
-            }
-
-            (x, y) = _twiceAffine(call, p, three, a, x, y);
-
-            mask_ = ((scalar1Bits_ >> 255) << 3) | (scalar2Bits_ >> 255);
+    function _getWord(uint512 scalar_, uint256 bit_) private pure returns (uint256) {
+        unchecked {
+            uint256 word_;
+            if (bit_ <= 253) {
+                assembly {
+                    word_ := mload(add(scalar_, 0x20))
+                }
 
-            if (mask_ != 0) {
-                (x, y) = _addAffine(call, p, three, a, points[mask_][0], points[mask_][1], x, y);
+                return (word_ >> bit_) & 0x07;
             }
 
-            for (uint256 word = 4; word <= 256; word += 3) {
-                (x, y) = _twice3Affine(call, p, three, a, x, y);
-
-                mask_ =
-                    (((scalar1Bits_ >> (256 - word)) & 0x07) << 3) |
-                    ((scalar2Bits_ >> (256 - word)) & 0x07);
-
-                if (mask_ != 0) {
-                    (x, y) = _addAffine(
-                        call,
-                        p,
-                        three,
-                        a,
-                        points[mask_][0],
-                        points[mask_][1],
-                        x,
-                        y
-                    );
-                }
+            assembly {
+                word_ := mload(add(scalar_, 0x10))
             }
+
+            return (word_ >> (bit_ - 128)) & 0x07;
         }
     }
 
@@ -271,35 +257,38 @@ library ECDSA384 {
      */
     function _twiceAffine(
         uint256 call,
-        uint256 p,
-        uint256 three,
-        uint256 a,
-        uint256 x1,
-        uint256 y1
-    ) private view returns (uint256 x2, uint256 y2) {
+        uint512 p,
+        uint512 three,
+        uint512 a,
+        uint512 x1,
+        uint512 y1
+    ) private view returns (uint512 x2, uint512 y2) {
         unchecked {
-            if (x1 == 0) {
-                return (0, 0);
+            x2 = U512.init();
+            y2 = U512.init();
+
+            if (x1.isNull()) {
+                return (U512.init(), U512.init());
             }
 
-            if (U384.eqInteger(y1, 0)) {
-                return (0, 0);
+            if (U512.eqInteger(y1, 0)) {
+                return (U512.init(), U512.init());
             }
 
-            uint256 m1 = U384.modexp(call, x1, 2);
-            U384.modmulAssign(call, m1, three);
-            U384.modaddAssign(m1, a, p);
+            uint512 m1 = U512.modexp(call, x1, 2);
+            U512.modmulAssign(call, m1, three);
+            U512.modaddAssign(m1, a, p);
 
-            uint256 m2 = U384.modshl1(y1, p);
-            U384.moddivAssign(call, m1, m2);
+            uint512 m2 = U512.modshl1(y1, p);
+            U512.moddivAssign(call, m1, m2);
 
-            x2 = U384.modexp(call, m1, 2);
-            U384.modsubAssign(x2, x1, p);
-            U384.modsubAssign(x2, x1, p);
+            x2 = U512.modexp(call, m1, 2);
+            U512.modsubAssign(x2, x1, p);
+            U512.modsubAssign(x2, x1, p);
 
-            y2 = U384.modsub(x1, x2, p);
-            U384.modmulAssign(call, y2, m1);
-            U384.modsubAssign(y2, y1, p);
+            y2 = U512.modsub(x1, x2, p);
+            U512.modmulAssign(call, y2, m1);
+            U512.modsubAssign(y2, y1, p);
         }
     }
 
@@ -308,73 +297,76 @@ library ECDSA384 {
      */
     function _twice3Affine(
         uint256 call,
-        uint256 p,
-        uint256 three,
-        uint256 a,
-        uint256 x1,
-        uint256 y1
-    ) private view returns (uint256 x2, uint256 y2) {
+        uint512 p,
+        uint512 three,
+        uint512 a,
+        uint512 x1,
+        uint512 y1
+    ) private view returns (uint512 x2, uint512 y2) {
         unchecked {
-            if (x1 == 0) {
-                return (0, 0);
+            x2 = U512.init();
+            y2 = U512.init();
+
+            if (x1.isNull()) {
+                return (U512.init(), U512.init());
             }
 
-            if (U384.eqInteger(y1, 0)) {
-                return (0, 0);
+            if (U512.eqInteger(y1, 0)) {
+                return (U512.init(), U512.init());
             }
 
-            uint256 m1 = U384.modexp(call, x1, 2);
-            U384.modmulAssign(call, m1, three);
-            U384.modaddAssign(m1, a, p);
+            uint512 m1 = U512.modexp(call, x1, 2);
+            U512.modmulAssign(call, m1, three);
+            U512.modaddAssign(m1, a, p);
 
-            uint256 m2 = U384.modshl1(y1, p);
-            U384.moddivAssign(call, m1, m2);
+            uint512 m2 = U512.modshl1(y1, p);
+            U512.moddivAssign(call, m1, m2);
 
-            x2 = U384.modexp(call, m1, 2);
-            U384.modsubAssign(x2, x1, p);
-            U384.modsubAssign(x2, x1, p);
+            x2 = U512.modexp(call, m1, 2);
+            U512.modsubAssign(x2, x1, p);
+            U512.modsubAssign(x2, x1, p);
 
-            y2 = U384.modsub(x1, x2, p);
-            U384.modmulAssign(call, y2, m1);
-            U384.modsubAssign(y2, y1, p);
+            y2 = U512.modsub(x1, x2, p);
+            U512.modmulAssign(call, y2, m1);
+            U512.modsubAssign(y2, y1, p);
 
-            if (U384.eqInteger(y2, 0)) {
-                return (0, 0);
+            if (U512.eqInteger(y2, 0)) {
+                return (U512.init(), U512.init());
             }
 
-            U384.modexpAssignTo(call, m1, x2, 2);
-            U384.modmulAssign(call, m1, three);
-            U384.modaddAssign(m1, a, p);
+            U512.modexpAssignTo(call, m1, x2, 2);
+            U512.modmulAssign(call, m1, three);
+            U512.modaddAssign(m1, a, p);
 
-            U384.modshl1AssignTo(m2, y2, p);
-            U384.moddivAssign(call, m1, m2);
+            U512.modshl1AssignTo(m2, y2, p);
+            U512.moddivAssign(call, m1, m2);
 
-            U384.modexpAssignTo(call, x1, m1, 2);
-            U384.modsubAssign(x1, x2, p);
-            U384.modsubAssign(x1, x2, p);
+            U512.modexpAssignTo(call, x1, m1, 2);
+            U512.modsubAssign(x1, x2, p);
+            U512.modsubAssign(x1, x2, p);
 
-            U384.modsubAssignTo(y1, x2, x1, p);
-            U384.modmulAssign(call, y1, m1);
-            U384.modsubAssign(y1, y2, p);
+            U512.modsubAssignTo(y1, x2, x1, p);
+            U512.modmulAssign(call, y1, m1);
+            U512.modsubAssign(y1, y2, p);
 
-            if (U384.eqInteger(y1, 0)) {
-                return (0, 0);
+            if (U512.eqInteger(y1, 0)) {
+                return (U512.init(), U512.init());
             }
 
-            U384.modexpAssignTo(call, m1, x1, 2);
-            U384.modmulAssign(call, m1, three);
-            U384.modaddAssign(m1, a, p);
+            U512.modexpAssignTo(call, m1, x1, 2);
+            U512.modmulAssign(call, m1, three);
+            U512.modaddAssign(m1, a, p);
 
-            U384.modshl1AssignTo(m2, y1, p);
-            U384.moddivAssign(call, m1, m2);
+            U512.modshl1AssignTo(m2, y1, p);
+            U512.moddivAssign(call, m1, m2);
 
-            U384.modexpAssignTo(call, x2, m1, 2);
-            U384.modsubAssign(x2, x1, p);
-            U384.modsubAssign(x2, x1, p);
+            U512.modexpAssignTo(call, x2, m1, 2);
+            U512.modsubAssign(x2, x1, p);
+            U512.modsubAssign(x2, x1, p);
 
-            U384.modsubAssignTo(y2, x1, x2, p);
-            U384.modmulAssign(call, y2, m1);
-            U384.modsubAssign(y2, y1, p);
+            U512.modsubAssignTo(y2, x1, x2, p);
+            U512.modmulAssign(call, y2, m1);
+            U512.modsubAssign(y2, y1, p);
         }
     }
 
@@ -383,56 +375,59 @@ library ECDSA384 {
      */
     function _addAffine(
         uint256 call,
-        uint256 p,
-        uint256 three,
-        uint256 a,
-        uint256 x1,
-        uint256 y1,
-        uint256 x2,
-        uint256 y2
-    ) private view returns (uint256 x3, uint256 y3) {
+        uint512 p,
+        uint512 three,
+        uint512 a,
+        uint512 x1,
+        uint512 y1,
+        uint512 x2,
+        uint512 y2
+    ) private view returns (uint512 x3, uint512 y3) {
         unchecked {
-            if (x1 == 0 || x2 == 0) {
-                if (x1 == 0 && x2 == 0) {
-                    return (0, 0);
+            x3 = U512.init();
+            y3 = U512.init();
+
+            if (x1.isNull() || x2.isNull()) {
+                if (x1.isNull() && x2.isNull()) {
+                    return (U512.init(), U512.init());
                 }
 
-                return x1 == 0 ? (x2.copy(), y2.copy()) : (x1.copy(), y1.copy());
+                return x1.isNull() ? (x2.copy(), y2.copy()) : (x1.copy(), y1.copy());
             }
 
-            if (U384.eq(x1, x2)) {
-                if (U384.eq(y1, y2)) {
+            if (U512.eq(x1, x2)) {
+                if (U512.eq(y1, y2)) {
                     return _twiceAffine(call, p, three, a, x1, y1);
                 }
 
-                return (0, 0);
+                return (U512.init(), U512.init());
             }
 
-            uint256 m1 = U384.modsub(y1, y2, p);
-            uint256 m2 = U384.modsub(x1, x2, p);
+            uint512 m1 = U512.modsub(y1, y2, p);
+            uint512 m2 = U512.modsub(x1, x2, p);
 
-            U384.moddivAssign(call, m1, m2);
+            U512.moddivAssign(call, m1, m2);
 
-            x3 = U384.modexp(call, m1, 2);
-            U384.modsubAssign(x3, x1, p);
-            U384.modsubAssign(x3, x2, p);
+            x3 = U512.modexp(call, m1, 2);
+            U512.modsubAssign(x3, x1, p);
+            U512.modsubAssign(x3, x2, p);
 
-            y3 = U384.modsub(x1, x3, p);
-            U384.modmulAssign(call, y3, m1);
-            U384.modsubAssign(y3, y1, p);
+            y3 = U512.modsub(x1, x3, p);
+            U512.modmulAssign(call, y3, m1);
+            U512.modsubAssign(y3, y1, p);
         }
     }
 
     function _precomputePointsTable(
         uint256 call,
-        uint256 p,
-        uint256 three,
-        uint256 a,
-        uint256 gx,
-        uint256 gy,
-        uint256 hx,
-        uint256 hy
-    ) private view returns (uint256[2][64] memory points_) {
+        uint512 p,
+        uint512 three,
+        uint512 a,
+        uint512 gx,
+        uint512 gy,
+        uint512 hx,
+        uint512 hy
+    ) private view returns (uint512[2][64] memory points_) {
         unchecked {
             (points_[0x01][0], points_[0x01][1]) = (hx.copy(), hy.copy());
             (points_[0x08][0], points_[0x08][1]) = (gx.copy(), gy.copy());
@@ -477,534 +472,3 @@ library ECDSA384 {
         }
     }
 }
-
-/**
- * @notice Low-level utility library that implements unsigned 384-bit arithmetics.
- *
- * Should not be used outside of this file.
- */
-library U384 {
-    uint256 private constant SHORT_ALLOCATION = 64;
-
-    uint256 private constant CALL_ALLOCATION = 4 * 288;
-
-    uint256 private constant MUL_OFFSET = 288;
-    uint256 private constant EXP_OFFSET = 2 * 288;
-    uint256 private constant INV_OFFSET = 3 * 288;
-
-    function init(uint256 from_) internal pure returns (uint256 handler_) {
-        unchecked {
-            handler_ = _allocate(SHORT_ALLOCATION);
-
-            assembly {
-                mstore(handler_, 0x00)
-                mstore(add(0x20, handler_), from_)
-            }
-
-            return handler_;
-        }
-    }
-
-    function init(bytes memory from_) internal pure returns (uint256 handler_) {
-        unchecked {
-            require(from_.length == 48, "U384: not 384");
-
-            handler_ = _allocate(SHORT_ALLOCATION);
-
-            assembly {
-                mstore(handler_, 0x00)
-                mstore(add(handler_, 0x10), mload(add(from_, 0x20)))
-                mstore(add(handler_, 0x20), mload(add(from_, 0x30)))
-            }
-
-            return handler_;
-        }
-    }
-
-    function init2(
-        bytes memory from2_
-    ) internal pure returns (uint256 handler1_, uint256 handler2_) {
-        unchecked {
-            require(from2_.length == 96, "U384: not 768");
-
-            handler1_ = _allocate(SHORT_ALLOCATION);
-            handler2_ = _allocate(SHORT_ALLOCATION);
-
-            assembly {
-                mstore(handler1_, 0x00)
-                mstore(add(handler1_, 0x10), mload(add(from2_, 0x20)))
-                mstore(add(handler1_, 0x20), mload(add(from2_, 0x30)))
-
-                mstore(handler2_, 0x00)
-                mstore(add(handler2_, 0x10), mload(add(from2_, 0x50)))
-                mstore(add(handler2_, 0x20), mload(add(from2_, 0x60)))
-            }
-
-            return (handler1_, handler2_);
-        }
-    }
-
-    function initCall(uint256 m_) internal pure returns (uint256 handler_) {
-        unchecked {
-            handler_ = _allocate(CALL_ALLOCATION);
-
-            _sub(m_, init(2), handler_ + INV_OFFSET + 0xA0);
-
-            assembly {
-                let call_ := add(handler_, MUL_OFFSET)
-
-                mstore(call_, 0x60)
-                mstore(add(0x20, call_), 0x20)
-                mstore(add(0x40, call_), 0x40)
-                mstore(add(0xC0, call_), 0x01)
-                mstore(add(0xE0, call_), mload(m_))
-                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
-
-                call_ := add(handler_, EXP_OFFSET)
-
-                mstore(call_, 0x40)
-                mstore(add(0x20, call_), 0x20)
-                mstore(add(0x40, call_), 0x40)
-                mstore(add(0xC0, call_), mload(m_))
-                mstore(add(0xE0, call_), mload(add(m_, 0x20)))
-
-                call_ := add(handler_, INV_OFFSET)
-
-                mstore(call_, 0x40)
-                mstore(add(0x20, call_), 0x40)
-                mstore(add(0x40, call_), 0x40)
-                mstore(add(0xE0, call_), mload(m_))
-                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
-            }
-        }
-    }
-
-    function copy(uint256 handler_) internal pure returns (uint256 handlerCopy_) {
-        unchecked {
-            handlerCopy_ = _allocate(SHORT_ALLOCATION);
-
-            assembly {
-                mstore(handlerCopy_, mload(handler_))
-                mstore(add(handlerCopy_, 0x20), mload(add(handler_, 0x20)))
-            }
-
-            return handlerCopy_;
-        }
-    }
-
-    function eq(uint256 a_, uint256 b_) internal pure returns (bool eq_) {
-        assembly {
-            eq_ := and(eq(mload(a_), mload(b_)), eq(mload(add(a_, 0x20)), mload(add(b_, 0x20))))
-        }
-    }
-
-    function eqInteger(uint256 a_, uint256 bInteger_) internal pure returns (bool eq_) {
-        assembly {
-            eq_ := and(eq(mload(a_), 0), eq(mload(add(a_, 0x20)), bInteger_))
-        }
-    }
-
-    function cmp(uint256 a_, uint256 b_) internal pure returns (int256 cmp_) {
-        unchecked {
-            uint256 aWord_;
-            uint256 bWord_;
-
-            assembly {
-                aWord_ := mload(a_)
-                bWord_ := mload(b_)
-            }
-
-            if (aWord_ > bWord_) {
-                return 1;
-            }
-
-            if (aWord_ < bWord_) {
-                return -1;
-            }
-
-            assembly {
-                aWord_ := mload(add(a_, 0x20))
-                bWord_ := mload(add(b_, 0x20))
-            }
-
-            if (aWord_ > bWord_) {
-                return 1;
-            }
-
-            if (aWord_ < bWord_) {
-                return -1;
-            }
-        }
-    }
-
-    function modAssign(uint256 call_, uint256 a_, uint256 m_) internal view {
-        assembly {
-            mstore(call_, 0x40)
-            mstore(add(0x20, call_), 0x20)
-            mstore(add(0x40, call_), 0x40)
-            mstore(add(0x60, call_), mload(a_))
-            mstore(add(0x80, call_), mload(add(a_, 0x20)))
-            mstore(add(0xA0, call_), 0x01)
-            mstore(add(0xC0, call_), mload(m_))
-            mstore(add(0xE0, call_), mload(add(m_, 0x20)))
-
-            pop(staticcall(gas(), 0x5, call_, 0x0100, a_, 0x40))
-        }
-    }
-
-    function modexp(
-        uint256 call_,
-        uint256 b_,
-        uint256 eInteger_
-    ) internal view returns (uint256 r_) {
-        unchecked {
-            r_ = _allocate(SHORT_ALLOCATION);
-
-            assembly {
-                call_ := add(call_, EXP_OFFSET)
-
-                mstore(add(0x60, call_), mload(b_))
-                mstore(add(0x80, call_), mload(add(b_, 0x20)))
-                mstore(add(0xA0, call_), eInteger_)
-
-                pop(staticcall(gas(), 0x5, call_, 0x0100, r_, 0x40))
-            }
-
-            return r_;
-        }
-    }
-
-    function modexpAssignTo(
-        uint256 call_,
-        uint256 to_,
-        uint256 b_,
-        uint256 eInteger_
-    ) internal view {
-        assembly {
-            call_ := add(call_, EXP_OFFSET)
-
-            mstore(add(0x60, call_), mload(b_))
-            mstore(add(0x80, call_), mload(add(b_, 0x20)))
-            mstore(add(0xA0, call_), eInteger_)
-
-            pop(staticcall(gas(), 0x5, call_, 0x0100, to_, 0x40))
-        }
-    }
-
-    function modadd(uint256 a_, uint256 b_, uint256 m_) internal pure returns (uint256 r_) {
-        unchecked {
-            r_ = _allocate(SHORT_ALLOCATION);
-
-            _add(a_, b_, r_);
-
-            if (cmp(r_, m_) >= 0) {
-                _subFrom(r_, m_);
-            }
-
-            return r_;
-        }
-    }
-
-    function modaddAssign(uint256 a_, uint256 b_, uint256 m_) internal pure {
-        unchecked {
-            _addTo(a_, b_);
-
-            if (cmp(a_, m_) >= 0) {
-                return _subFrom(a_, m_);
-            }
-        }
-    }
-
-    function modmul(uint256 call_, uint256 a_, uint256 b_) internal view returns (uint256 r_) {
-        unchecked {
-            r_ = _allocate(SHORT_ALLOCATION);
-
-            _mul(a_, b_, call_ + MUL_OFFSET + 0x60);
-
-            assembly {
-                call_ := add(call_, MUL_OFFSET)
-
-                pop(staticcall(gas(), 0x5, call_, 0x0120, r_, 0x40))
-            }
-
-            return r_;
-        }
-    }
-
-    function modmulAssign(uint256 call_, uint256 a_, uint256 b_) internal view {
-        unchecked {
-            _mul(a_, b_, call_ + MUL_OFFSET + 0x60);
-
-            assembly {
-                call_ := add(call_, MUL_OFFSET)
-
-                pop(staticcall(gas(), 0x5, call_, 0x0120, a_, 0x40))
-            }
-        }
-    }
-
-    function modsub(uint256 a_, uint256 b_, uint256 m_) internal pure returns (uint256 r_) {
-        unchecked {
-            r_ = _allocate(SHORT_ALLOCATION);
-
-            if (cmp(a_, b_) >= 0) {
-                _sub(a_, b_, r_);
-                return r_;
-            }
-
-            _add(a_, m_, r_);
-            _subFrom(r_, b_);
-        }
-    }
-
-    function modsubAssign(uint256 a_, uint256 b_, uint256 m_) internal pure {
-        unchecked {
-            if (cmp(a_, b_) >= 0) {
-                _subFrom(a_, b_);
-                return;
-            }
-
-            _addTo(a_, m_);
-            _subFrom(a_, b_);
-        }
-    }
-
-    function modsubAssignTo(uint256 to_, uint256 a_, uint256 b_, uint256 m_) internal pure {
-        unchecked {
-            if (cmp(a_, b_) >= 0) {
-                _sub(a_, b_, to_);
-                return;
-            }
-
-            _add(a_, m_, to_);
-            _subFrom(to_, b_);
-        }
-    }
-
-    function modshl1(uint256 a_, uint256 m_) internal pure returns (uint256 r_) {
-        unchecked {
-            r_ = _allocate(SHORT_ALLOCATION);
-
-            _shl1(a_, r_);
-
-            if (cmp(r_, m_) >= 0) {
-                _subFrom(r_, m_);
-            }
-
-            return r_;
-        }
-    }
-
-    function modshl1AssignTo(uint256 to_, uint256 a_, uint256 m_) internal pure {
-        unchecked {
-            _shl1(a_, to_);
-
-            if (cmp(to_, m_) >= 0) {
-                _subFrom(to_, m_);
-            }
-        }
-    }
-
-    /// @dev Stores modinv into `b_` and moddiv into `a_`.
-    function moddivAssign(uint256 call_, uint256 a_, uint256 b_) internal view {
-        unchecked {
-            assembly {
-                call_ := add(call_, INV_OFFSET)
-
-                mstore(add(0x60, call_), mload(b_))
-                mstore(add(0x80, call_), mload(add(b_, 0x20)))
-
-                pop(staticcall(gas(), 0x5, call_, 0x0120, b_, 0x40))
-            }
-
-            modmulAssign(call_ - INV_OFFSET, a_, b_);
-        }
-    }
-
-    function moddiv(
-        uint256 call_,
-        uint256 a_,
-        uint256 b_,
-        uint256 m_
-    ) internal view returns (uint256 r_) {
-        unchecked {
-            r_ = modinv(call_, b_, m_);
-
-            _mul(a_, r_, call_ + 0x60);
-
-            assembly {
-                mstore(call_, 0x60)
-                mstore(add(0x20, call_), 0x20)
-                mstore(add(0x40, call_), 0x40)
-                mstore(add(0xC0, call_), 0x01)
-                mstore(add(0xE0, call_), mload(m_))
-                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
-
-                pop(staticcall(gas(), 0x5, call_, 0x0120, r_, 0x40))
-            }
-        }
-    }
-
-    function modinv(uint256 call_, uint256 b_, uint256 m_) internal view returns (uint256 r_) {
-        unchecked {
-            r_ = _allocate(SHORT_ALLOCATION);
-
-            _sub(m_, init(2), call_ + 0xA0);
-
-            assembly {
-                mstore(call_, 0x40)
-                mstore(add(0x20, call_), 0x40)
-                mstore(add(0x40, call_), 0x40)
-                mstore(add(0x60, call_), mload(b_))
-                mstore(add(0x80, call_), mload(add(b_, 0x20)))
-                mstore(add(0xE0, call_), mload(m_))
-                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
-
-                pop(staticcall(gas(), 0x5, call_, 0x0120, r_, 0x40))
-            }
-        }
-    }
-
-    function _shl1(uint256 a_, uint256 r_) internal pure {
-        assembly {
-            let a1_ := mload(add(a_, 0x20))
-
-            mstore(r_, or(shl(1, mload(a_)), shr(255, a1_)))
-            mstore(add(r_, 0x20), shl(1, a1_))
-        }
-    }
-
-    function _add(uint256 a_, uint256 b_, uint256 r_) private pure {
-        assembly {
-            let aWord_ := mload(add(a_, 0x20))
-            let sum_ := add(aWord_, mload(add(b_, 0x20)))
-
-            mstore(add(r_, 0x20), sum_)
-
-            sum_ := gt(aWord_, sum_)
-            sum_ := add(sum_, add(mload(a_), mload(b_)))
-
-            mstore(r_, sum_)
-        }
-    }
-
-    function _sub(uint256 a_, uint256 b_, uint256 r_) private pure {
-        assembly {
-            let aWord_ := mload(add(a_, 0x20))
-            let diff_ := sub(aWord_, mload(add(b_, 0x20)))
-
-            mstore(add(r_, 0x20), diff_)
-
-            diff_ := gt(diff_, aWord_)
-            diff_ := sub(sub(mload(a_), mload(b_)), diff_)
-
-            mstore(r_, diff_)
-        }
-    }
-
-    function _subFrom(uint256 a_, uint256 b_) private pure {
-        assembly {
-            let aWord_ := mload(add(a_, 0x20))
-            let diff_ := sub(aWord_, mload(add(b_, 0x20)))
-
-            mstore(add(a_, 0x20), diff_)
-
-            diff_ := gt(diff_, aWord_)
-            diff_ := sub(sub(mload(a_), mload(b_)), diff_)
-
-            mstore(a_, diff_)
-        }
-    }
-
-    function _addTo(uint256 a_, uint256 b_) private pure {
-        assembly {
-            let aWord_ := mload(add(a_, 0x20))
-            let sum_ := add(aWord_, mload(add(b_, 0x20)))
-
-            mstore(add(a_, 0x20), sum_)
-
-            sum_ := gt(aWord_, sum_)
-            sum_ := add(sum_, add(mload(a_), mload(b_)))
-
-            mstore(a_, sum_)
-        }
-    }
-
-    function _mul(uint256 a_, uint256 b_, uint256 r_) private pure {
-        assembly {
-            let a0_ := mload(a_)
-            let a1_ := shr(128, mload(add(a_, 0x20)))
-            let a2_ := and(mload(add(a_, 0x20)), 0xffffffffffffffffffffffffffffffff)
-
-            let b0_ := mload(b_)
-            let b1_ := shr(128, mload(add(b_, 0x20)))
-            let b2_ := and(mload(add(b_, 0x20)), 0xffffffffffffffffffffffffffffffff)
-
-            // r5
-            let current_ := mul(a2_, b2_)
-            let r0_ := and(current_, 0xffffffffffffffffffffffffffffffff)
-
-            // r4
-            current_ := shr(128, current_)
-
-            let temp_ := mul(a1_, b2_)
-            current_ := add(current_, temp_)
-            let curry_ := lt(current_, temp_)
-
-            temp_ := mul(a2_, b1_)
-            current_ := add(current_, temp_)
-            curry_ := add(curry_, lt(current_, temp_))
-
-            mstore(add(r_, 0x40), add(shl(128, current_), r0_))
-
-            // r3
-            current_ := add(shl(128, curry_), shr(128, current_))
-            curry_ := 0
-
-            temp_ := mul(a0_, b2_)
-            current_ := add(current_, temp_)
-            curry_ := lt(current_, temp_)
-
-            temp_ := mul(a1_, b1_)
-            current_ := add(current_, temp_)
-            curry_ := add(curry_, lt(current_, temp_))
-
-            temp_ := mul(a2_, b0_)
-            current_ := add(current_, temp_)
-            curry_ := add(curry_, lt(current_, temp_))
-
-            r0_ := and(current_, 0xffffffffffffffffffffffffffffffff)
-
-            // r2
-            current_ := add(shl(128, curry_), shr(128, current_))
-            curry_ := 0
-
-            temp_ := mul(a0_, b1_)
-            current_ := add(current_, temp_)
-            curry_ := lt(current_, temp_)
-
-            temp_ := mul(a1_, b0_)
-            current_ := add(current_, temp_)
-            curry_ := add(curry_, lt(current_, temp_))
-
-            mstore(add(r_, 0x20), add(shl(128, current_), r0_))
-
-            // r1
-            current_ := add(shl(128, curry_), shr(128, current_))
-            current_ := add(current_, mul(a0_, b0_))
-
-            mstore(r_, current_)
-        }
-    }
-
-    function _allocate(uint256 bytes_) private pure returns (uint256 handler_) {
-        unchecked {
-            assembly {
-                handler_ := mload(0x40)
-                mstore(0x40, add(handler_, bytes_))
-            }
-
-            return handler_;
-        }
-    }
-}
diff --git a/contracts/libs/crypto/bn/U512.sol b/contracts/libs/crypto/bn/U512.sol
new file mode 100644
index 00000000..260aef0b
--- /dev/null
+++ b/contracts/libs/crypto/bn/U512.sol
@@ -0,0 +1,575 @@
+// SPDX-License-Identifier: MIT
+pragma solidity ^0.8.4;
+
+type uint512 is uint256;
+
+/**
+ * @notice Low-level utility library that implements unsigned 512-bit arithmetics.
+ */
+library U512 {
+    uint256 private constant SHORT_ALLOCATION = 64;
+    uint256 private constant LONG_ALLOCATION = 64;
+
+    uint256 private constant CALL_ALLOCATION = 4 * 288;
+
+    uint256 private constant MUL_OFFSET = 288;
+    uint256 private constant EXP_OFFSET = 2 * 288;
+    uint256 private constant INV_OFFSET = 3 * 288;
+
+    function toBytes(uint512 from_) internal pure returns (bytes memory bytes_) {
+        unchecked {
+            uint512 handler_ = _allocate(LONG_ALLOCATION);
+
+            assembly {
+                mstore(handler_, 0x40)
+                mstore(add(handler_, 0x20), mload(from_))
+                mstore(add(handler_, 0x40), mload(add(from_, 0x20)))
+
+                bytes_ := handler_
+            }
+        }
+    }
+
+    function isNull(uint512 handler) internal pure returns (bool isNull_) {
+        unchecked {
+            assembly {
+                isNull_ := iszero(handler)
+            }
+        }
+    }
+
+    function init() internal pure returns (uint512 handler_) {
+        unchecked {
+            assembly {
+                handler_ := 0
+            }
+        }
+    }
+
+    function init(uint256 from_) internal pure returns (uint512 handler_) {
+        unchecked {
+            handler_ = _allocate(SHORT_ALLOCATION);
+
+            assembly {
+                mstore(handler_, 0x00)
+                mstore(add(0x20, handler_), from_)
+            }
+
+            return handler_;
+        }
+    }
+
+    function init(bytes memory from_) internal pure returns (uint512 handler_) {
+        unchecked {
+            require(from_.length == 48, "U384: not 384");
+
+            handler_ = _allocate(SHORT_ALLOCATION);
+
+            assembly {
+                mstore(handler_, 0x00)
+                mstore(add(handler_, 0x10), mload(add(from_, 0x20)))
+                mstore(add(handler_, 0x20), mload(add(from_, 0x30)))
+            }
+
+            return handler_;
+        }
+    }
+
+    function init2(
+        bytes memory from2_
+    ) internal pure returns (uint512 handler1_, uint512 handler2_) {
+        unchecked {
+            require(from2_.length == 96, "U384: not 768");
+
+            handler1_ = _allocate(SHORT_ALLOCATION);
+            handler2_ = _allocate(SHORT_ALLOCATION);
+
+            assembly {
+                mstore(handler1_, 0x00)
+                mstore(add(handler1_, 0x10), mload(add(from2_, 0x20)))
+                mstore(add(handler1_, 0x20), mload(add(from2_, 0x30)))
+
+                mstore(handler2_, 0x00)
+                mstore(add(handler2_, 0x10), mload(add(from2_, 0x50)))
+                mstore(add(handler2_, 0x20), mload(add(from2_, 0x60)))
+            }
+
+            return (handler1_, handler2_);
+        }
+    }
+
+    function initCall(uint512 m_) internal pure returns (uint256 handler_) {
+        unchecked {
+            handler_ = _allocateCall(CALL_ALLOCATION);
+
+            _sub(m_, init(2), uint512.wrap(handler_ + INV_OFFSET + 0xA0));
+
+            assembly {
+                let call_ := add(handler_, MUL_OFFSET)
+
+                mstore(call_, 0x60)
+                mstore(add(0x20, call_), 0x20)
+                mstore(add(0x40, call_), 0x40)
+                mstore(add(0xC0, call_), 0x01)
+                mstore(add(0xE0, call_), mload(m_))
+                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
+
+                call_ := add(handler_, EXP_OFFSET)
+
+                mstore(call_, 0x40)
+                mstore(add(0x20, call_), 0x20)
+                mstore(add(0x40, call_), 0x40)
+                mstore(add(0xC0, call_), mload(m_))
+                mstore(add(0xE0, call_), mload(add(m_, 0x20)))
+
+                call_ := add(handler_, INV_OFFSET)
+
+                mstore(call_, 0x40)
+                mstore(add(0x20, call_), 0x40)
+                mstore(add(0x40, call_), 0x40)
+                mstore(add(0xE0, call_), mload(m_))
+                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
+            }
+        }
+    }
+
+    function copy(uint512 handler_) internal pure returns (uint512 handlerCopy_) {
+        unchecked {
+            handlerCopy_ = _allocate(SHORT_ALLOCATION);
+
+            assembly {
+                mstore(handlerCopy_, mload(handler_))
+                mstore(add(handlerCopy_, 0x20), mload(add(handler_, 0x20)))
+            }
+
+            return handlerCopy_;
+        }
+    }
+
+    function eq(uint512 a_, uint512 b_) internal pure returns (bool eq_) {
+        assembly {
+            eq_ := and(eq(mload(a_), mload(b_)), eq(mload(add(a_, 0x20)), mload(add(b_, 0x20))))
+        }
+    }
+
+    function eqInteger(uint512 a_, uint256 bInteger_) internal pure returns (bool eq_) {
+        assembly {
+            eq_ := and(eq(mload(a_), 0), eq(mload(add(a_, 0x20)), bInteger_))
+        }
+    }
+
+    function cmp(uint512 a_, uint512 b_) internal pure returns (int256 cmp_) {
+        unchecked {
+            uint256 aWord_;
+            uint256 bWord_;
+
+            assembly {
+                aWord_ := mload(a_)
+                bWord_ := mload(b_)
+            }
+
+            if (aWord_ > bWord_) {
+                return 1;
+            }
+
+            if (aWord_ < bWord_) {
+                return -1;
+            }
+
+            assembly {
+                aWord_ := mload(add(a_, 0x20))
+                bWord_ := mload(add(b_, 0x20))
+            }
+
+            if (aWord_ > bWord_) {
+                return 1;
+            }
+
+            if (aWord_ < bWord_) {
+                return -1;
+            }
+        }
+    }
+
+    function modAssign(uint256 call_, uint512 a_, uint512 m_) internal view {
+        assembly {
+            mstore(call_, 0x40)
+            mstore(add(0x20, call_), 0x20)
+            mstore(add(0x40, call_), 0x40)
+            mstore(add(0x60, call_), mload(a_))
+            mstore(add(0x80, call_), mload(add(a_, 0x20)))
+            mstore(add(0xA0, call_), 0x01)
+            mstore(add(0xC0, call_), mload(m_))
+            mstore(add(0xE0, call_), mload(add(m_, 0x20)))
+
+            pop(staticcall(gas(), 0x5, call_, 0x0100, a_, 0x40))
+        }
+    }
+
+    function modexp(
+        uint256 call_,
+        uint512 b_,
+        uint256 eInteger_
+    ) internal view returns (uint512 r_) {
+        unchecked {
+            r_ = _allocate(SHORT_ALLOCATION);
+
+            assembly {
+                call_ := add(call_, EXP_OFFSET)
+
+                mstore(add(0x60, call_), mload(b_))
+                mstore(add(0x80, call_), mload(add(b_, 0x20)))
+                mstore(add(0xA0, call_), eInteger_)
+
+                pop(staticcall(gas(), 0x5, call_, 0x0100, r_, 0x40))
+            }
+
+            return r_;
+        }
+    }
+
+    function modexpAssignTo(
+        uint256 call_,
+        uint512 to_,
+        uint512 b_,
+        uint256 eInteger_
+    ) internal view {
+        assembly {
+            call_ := add(call_, EXP_OFFSET)
+
+            mstore(add(0x60, call_), mload(b_))
+            mstore(add(0x80, call_), mload(add(b_, 0x20)))
+            mstore(add(0xA0, call_), eInteger_)
+
+            pop(staticcall(gas(), 0x5, call_, 0x0100, to_, 0x40))
+        }
+    }
+
+    function modadd(uint512 a_, uint512 b_, uint512 m_) internal pure returns (uint512 r_) {
+        unchecked {
+            r_ = _allocate(SHORT_ALLOCATION);
+
+            _add(a_, b_, r_);
+
+            if (cmp(r_, m_) >= 0) {
+                _subFrom(r_, m_);
+            }
+
+            return r_;
+        }
+    }
+
+    function modaddAssign(uint512 a_, uint512 b_, uint512 m_) internal pure {
+        unchecked {
+            _addTo(a_, b_);
+
+            if (cmp(a_, m_) >= 0) {
+                return _subFrom(a_, m_);
+            }
+        }
+    }
+
+    function modmul(uint256 call_, uint512 a_, uint512 b_) internal view returns (uint512 r_) {
+        unchecked {
+            r_ = _allocate(SHORT_ALLOCATION);
+
+            _mul(a_, b_, uint512.wrap(call_ + MUL_OFFSET + 0x60));
+
+            assembly {
+                call_ := add(call_, MUL_OFFSET)
+
+                pop(staticcall(gas(), 0x5, call_, 0x0120, r_, 0x40))
+            }
+
+            return r_;
+        }
+    }
+
+    function modmulAssign(uint256 call_, uint512 a_, uint512 b_) internal view {
+        unchecked {
+            _mul(a_, b_, uint512.wrap(call_ + MUL_OFFSET + 0x60));
+
+            assembly {
+                call_ := add(call_, MUL_OFFSET)
+
+                pop(staticcall(gas(), 0x5, call_, 0x0120, a_, 0x40))
+            }
+        }
+    }
+
+    function modsub(uint512 a_, uint512 b_, uint512 m_) internal pure returns (uint512 r_) {
+        unchecked {
+            r_ = _allocate(SHORT_ALLOCATION);
+
+            if (cmp(a_, b_) >= 0) {
+                _sub(a_, b_, r_);
+                return r_;
+            }
+
+            _add(a_, m_, r_);
+            _subFrom(r_, b_);
+        }
+    }
+
+    function modsubAssign(uint512 a_, uint512 b_, uint512 m_) internal pure {
+        unchecked {
+            if (cmp(a_, b_) >= 0) {
+                _subFrom(a_, b_);
+                return;
+            }
+
+            _addTo(a_, m_);
+            _subFrom(a_, b_);
+        }
+    }
+
+    function modsubAssignTo(uint512 to_, uint512 a_, uint512 b_, uint512 m_) internal pure {
+        unchecked {
+            if (cmp(a_, b_) >= 0) {
+                _sub(a_, b_, to_);
+                return;
+            }
+
+            _add(a_, m_, to_);
+            _subFrom(to_, b_);
+        }
+    }
+
+    function modshl1(uint512 a_, uint512 m_) internal pure returns (uint512 r_) {
+        unchecked {
+            r_ = _allocate(SHORT_ALLOCATION);
+
+            _shl1(a_, r_);
+
+            if (cmp(r_, m_) >= 0) {
+                _subFrom(r_, m_);
+            }
+
+            return r_;
+        }
+    }
+
+    function modshl1AssignTo(uint512 to_, uint512 a_, uint512 m_) internal pure {
+        unchecked {
+            _shl1(a_, to_);
+
+            if (cmp(to_, m_) >= 0) {
+                _subFrom(to_, m_);
+            }
+        }
+    }
+
+    /// @dev Stores modinv into `b_` and moddiv into `a_`.
+    function moddivAssign(uint256 call_, uint512 a_, uint512 b_) internal view {
+        unchecked {
+            assembly {
+                call_ := add(call_, INV_OFFSET)
+
+                mstore(add(0x60, call_), mload(b_))
+                mstore(add(0x80, call_), mload(add(b_, 0x20)))
+
+                pop(staticcall(gas(), 0x5, call_, 0x0120, b_, 0x40))
+            }
+
+            modmulAssign(call_ - INV_OFFSET, a_, b_);
+        }
+    }
+
+    function moddiv(
+        uint256 call_,
+        uint512 a_,
+        uint512 b_,
+        uint512 m_
+    ) internal view returns (uint512 r_) {
+        unchecked {
+            r_ = modinv(call_, b_, m_);
+
+            _mul(a_, r_, uint512.wrap(call_ + 0x60));
+
+            assembly {
+                mstore(call_, 0x60)
+                mstore(add(0x20, call_), 0x20)
+                mstore(add(0x40, call_), 0x40)
+                mstore(add(0xC0, call_), 0x01)
+                mstore(add(0xE0, call_), mload(m_))
+                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
+
+                pop(staticcall(gas(), 0x5, call_, 0x0120, r_, 0x40))
+            }
+        }
+    }
+
+    function modinv(uint256 call_, uint512 b_, uint512 m_) internal view returns (uint512 r_) {
+        unchecked {
+            r_ = _allocate(SHORT_ALLOCATION);
+
+            _sub(m_, init(2), uint512.wrap(call_ + 0xA0));
+
+            assembly {
+                mstore(call_, 0x40)
+                mstore(add(0x20, call_), 0x40)
+                mstore(add(0x40, call_), 0x40)
+                mstore(add(0x60, call_), mload(b_))
+                mstore(add(0x80, call_), mload(add(b_, 0x20)))
+                mstore(add(0xE0, call_), mload(m_))
+                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
+
+                pop(staticcall(gas(), 0x5, call_, 0x0120, r_, 0x40))
+            }
+        }
+    }
+
+    function _shl1(uint512 a_, uint512 r_) internal pure {
+        assembly {
+            let a1_ := mload(add(a_, 0x20))
+
+            mstore(r_, or(shl(1, mload(a_)), shr(255, a1_)))
+            mstore(add(r_, 0x20), shl(1, a1_))
+        }
+    }
+
+    function _add(uint512 a_, uint512 b_, uint512 r_) private pure {
+        assembly {
+            let aWord_ := mload(add(a_, 0x20))
+            let sum_ := add(aWord_, mload(add(b_, 0x20)))
+
+            mstore(add(r_, 0x20), sum_)
+
+            sum_ := gt(aWord_, sum_)
+            sum_ := add(sum_, add(mload(a_), mload(b_)))
+
+            mstore(r_, sum_)
+        }
+    }
+
+    function _sub(uint512 a_, uint512 b_, uint512 r_) private pure {
+        assembly {
+            let aWord_ := mload(add(a_, 0x20))
+            let diff_ := sub(aWord_, mload(add(b_, 0x20)))
+
+            mstore(add(r_, 0x20), diff_)
+
+            diff_ := gt(diff_, aWord_)
+            diff_ := sub(sub(mload(a_), mload(b_)), diff_)
+
+            mstore(r_, diff_)
+        }
+    }
+
+    function _subFrom(uint512 a_, uint512 b_) private pure {
+        assembly {
+            let aWord_ := mload(add(a_, 0x20))
+            let diff_ := sub(aWord_, mload(add(b_, 0x20)))
+
+            mstore(add(a_, 0x20), diff_)
+
+            diff_ := gt(diff_, aWord_)
+            diff_ := sub(sub(mload(a_), mload(b_)), diff_)
+
+            mstore(a_, diff_)
+        }
+    }
+
+    function _addTo(uint512 a_, uint512 b_) private pure {
+        assembly {
+            let aWord_ := mload(add(a_, 0x20))
+            let sum_ := add(aWord_, mload(add(b_, 0x20)))
+
+            mstore(add(a_, 0x20), sum_)
+
+            sum_ := gt(aWord_, sum_)
+            sum_ := add(sum_, add(mload(a_), mload(b_)))
+
+            mstore(a_, sum_)
+        }
+    }
+
+    function _mul(uint512 a_, uint512 b_, uint512 r_) private pure {
+        assembly {
+            let a0_ := mload(a_)
+            let a1_ := shr(128, mload(add(a_, 0x20)))
+            let a2_ := and(mload(add(a_, 0x20)), 0xffffffffffffffffffffffffffffffff)
+
+            let b0_ := mload(b_)
+            let b1_ := shr(128, mload(add(b_, 0x20)))
+            let b2_ := and(mload(add(b_, 0x20)), 0xffffffffffffffffffffffffffffffff)
+
+            // r5
+            let current_ := mul(a2_, b2_)
+            let r0_ := and(current_, 0xffffffffffffffffffffffffffffffff)
+
+            // r4
+            current_ := shr(128, current_)
+
+            let temp_ := mul(a1_, b2_)
+            current_ := add(current_, temp_)
+            let curry_ := lt(current_, temp_)
+
+            temp_ := mul(a2_, b1_)
+            current_ := add(current_, temp_)
+            curry_ := add(curry_, lt(current_, temp_))
+
+            mstore(add(r_, 0x40), add(shl(128, current_), r0_))
+
+            // r3
+            current_ := add(shl(128, curry_), shr(128, current_))
+            curry_ := 0
+
+            temp_ := mul(a0_, b2_)
+            current_ := add(current_, temp_)
+            curry_ := lt(current_, temp_)
+
+            temp_ := mul(a1_, b1_)
+            current_ := add(current_, temp_)
+            curry_ := add(curry_, lt(current_, temp_))
+
+            temp_ := mul(a2_, b0_)
+            current_ := add(current_, temp_)
+            curry_ := add(curry_, lt(current_, temp_))
+
+            r0_ := and(current_, 0xffffffffffffffffffffffffffffffff)
+
+            // r2
+            current_ := add(shl(128, curry_), shr(128, current_))
+            curry_ := 0
+
+            temp_ := mul(a0_, b1_)
+            current_ := add(current_, temp_)
+            curry_ := lt(current_, temp_)
+
+            temp_ := mul(a1_, b0_)
+            current_ := add(current_, temp_)
+            curry_ := add(curry_, lt(current_, temp_))
+
+            mstore(add(r_, 0x20), add(shl(128, current_), r0_))
+
+            // r1
+            current_ := add(shl(128, curry_), shr(128, current_))
+            current_ := add(current_, mul(a0_, b0_))
+
+            mstore(r_, current_)
+        }
+    }
+
+    function _allocate(uint256 bytes_) private pure returns (uint512 handler_) {
+        unchecked {
+            assembly {
+                handler_ := mload(0x40)
+                mstore(0x40, add(handler_, bytes_))
+            }
+
+            return handler_;
+        }
+    }
+
+    function _allocateCall(uint256 bytes_) private pure returns (uint256 handler_) {
+        unchecked {
+            assembly {
+                handler_ := mload(0x40)
+                mstore(0x40, add(handler_, bytes_))
+            }
+
+            return handler_;
+        }
+    }
+}
diff --git a/contracts/mock/libs/crypto/ECDSA384Mock.sol b/contracts/mock/libs/crypto/ECDSA384Mock.sol
index 17aea8eb..01054f1f 100644
--- a/contracts/mock/libs/crypto/ECDSA384Mock.sol
+++ b/contracts/mock/libs/crypto/ECDSA384Mock.sol
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: MIT
 pragma solidity ^0.8.4;
 
-import {ECDSA384, U384} from "../../../libs/crypto/ECDSA384.sol";
+import "../../../libs/crypto/bn/U512.sol";
+import {ECDSA384} from "../../../libs/crypto/ECDSA384.sol";
 
 contract ECDSA384Mock {
     using ECDSA384 for *;
@@ -73,8 +74,8 @@ contract ECDSA384Mock {
     }
 
     function cmpMock() external pure returns (int256 cmp_) {
-        uint256 a_;
-        uint256 b_;
+        uint512 a_;
+        uint512 b_;
 
         assembly {
             a_ := mload(0x40)
@@ -86,6 +87,6 @@ contract ECDSA384Mock {
             mstore(0x40, add(b_, 0x40))
         }
 
-        return U384.cmp(a_, b_);
+        return U512.cmp(a_, b_);
     }
 }
diff --git a/hardhat.config.ts b/hardhat.config.ts
index 7bed7690..71d85f0d 100644
--- a/hardhat.config.ts
+++ b/hardhat.config.ts
@@ -61,7 +61,7 @@ const config: HardhatUserConfig = {
   gasReporter: {
     currency: "USD",
     gasPrice: 50,
-    enabled: false,
+    enabled: true,
     reportPureAndViewMethods: true,
     coinmarketcap: `${process.env.COINMARKETCAP_KEY}`,
   },
diff --git a/test/libs/crypto/ECDSA384.test.ts b/test/libs/crypto/ECDSA384.test.ts
index de29aaac..9294b951 100644
--- a/test/libs/crypto/ECDSA384.test.ts
+++ b/test/libs/crypto/ECDSA384.test.ts
@@ -168,7 +168,7 @@ describe("ECDSA384", () => {
     });
   });
 
-  describe("brainpoolP384r1", () => {
+  describe.only("brainpoolP384r1", () => {
     const signature =
       "0x42d803dcea3f9809cda4ce5a541d969dbeacd6ab7bef7788db1e4a00dac3ae87c1c241c24bb39e041725e607718fc322306b08967b56e4e49d7c9afc48833f580ac9b49cdcec0962d564f89a8f0b57a9742573ebcbe709869253e8b466cb33be";
     const pubKey =

From 9d6d09c1d33043441bc592903ecb6b032ad87f95 Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Thu, 9 Jan 2025 20:04:46 +0200
Subject: [PATCH 02/42] rm comments

---
 contracts/libs/crypto/ECDSA384.sol | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/contracts/libs/crypto/ECDSA384.sol b/contracts/libs/crypto/ECDSA384.sol
index 12c6581d..bc8bef8c 100644
--- a/contracts/libs/crypto/ECDSA384.sol
+++ b/contracts/libs/crypto/ECDSA384.sol
@@ -200,15 +200,6 @@ library ECDSA384 {
             uint256 mask_;
             uint256 mask1_;
             uint256 mask2_;
-            //
-            //            console.logBytes(scalar1.toBytes());
-            //            console.logBytes(scalar2.toBytes());
-            //
-            //            console.log(_getWord(scalar1, 384));
-            //            console.log(_getWord(scalar2, 384));
-            //
-            //            console.log(_getWord(scalar1, 383));
-            //            console.log(_getWord(scalar2, 383));
 
             for (uint256 bit = 3; bit <= 384; bit += 3) {
                 mask1_ = _getWord(scalar1, 384 - bit);

From c8c57db3936f4ffe69245f633d7983ac91b12293 Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Wed, 15 Jan 2025 20:44:03 +0200
Subject: [PATCH 03/42] wip

---
 contracts/libs/crypto/ECDSA384.sol          | 408 +++++-----
 contracts/libs/crypto/bn/U512.sol           | 834 ++++++++++++--------
 contracts/mock/libs/crypto/ECDSA384Mock.sol |   3 +-
 3 files changed, 703 insertions(+), 542 deletions(-)

diff --git a/contracts/libs/crypto/ECDSA384.sol b/contracts/libs/crypto/ECDSA384.sol
index bc8bef8c..60a01e45 100644
--- a/contracts/libs/crypto/ECDSA384.sol
+++ b/contracts/libs/crypto/ECDSA384.sol
@@ -1,10 +1,12 @@
 // SPDX-License-Identifier: MIT
 pragma solidity ^0.8.4;
 
+import {call, uint512} from "./bn/U512.sol";
 import {U512} from "./bn/U512.sol";
-import {uint512} from "./bn/U512.sol";
 import {MemoryUtils} from "../utils/MemoryUtils.sol";
+
 import "hardhat/console.sol";
+
 /**
  * @notice Cryptography module
  *
@@ -18,7 +20,6 @@ import "hardhat/console.sol";
  */
 library ECDSA384 {
     using MemoryUtils for *;
-    using U512 for *;
 
     /**
      * @notice 384-bit curve parameters.
@@ -69,26 +70,55 @@ library ECDSA384 {
         unchecked {
             _Inputs memory inputs_;
 
-            (inputs_.r, inputs_.s) = U512.init2(signature_);
-            (inputs_.x, inputs_.y) = U512.init2(pubKey_);
+            {
+                bytes memory lhs_ = new bytes(64);
+                bytes memory rhs_ = new bytes(64);
+
+                MemoryUtils.unsafeCopy(
+                    signature_.getDataPointer(),
+                    lhs_.getDataPointer() + 0x10,
+                    48
+                );
+                MemoryUtils.unsafeCopy(
+                    signature_.getDataPointer() + 0x30,
+                    rhs_.getDataPointer() + 0x10,
+                    48
+                );
+
+                (inputs_.r, inputs_.s) = (U512.fromBytes(lhs_), U512.fromBytes(rhs_));
+
+                MemoryUtils.unsafeCopy(pubKey_.getDataPointer(), lhs_.getDataPointer() + 0x10, 48);
+                MemoryUtils.unsafeCopy(
+                    pubKey_.getDataPointer() + 0x30,
+                    rhs_.getDataPointer() + 0x10,
+                    48
+                );
+
+                (inputs_.x, inputs_.y) = (U512.fromBytes(lhs_), U512.fromBytes(rhs_));
+            }
+
+            console.logBytes(U512.toBytes(inputs_.x));
+            console.logBytes(U512.toBytes(inputs_.y));
+            console.logBytes(U512.toBytes(inputs_.r));
+            console.logBytes(U512.toBytes(inputs_.s));
 
             _Parameters memory params_ = _Parameters({
-                a: curveParams_.a.init(),
-                b: curveParams_.b.init(),
-                gx: curveParams_.gx.init(),
-                gy: curveParams_.gy.init(),
-                p: curveParams_.p.init(),
-                n: curveParams_.n.init(),
-                lowSmax: curveParams_.lowSmax.init()
+                a: U512.fromBytes(curveParams_.a),
+                b: U512.fromBytes(curveParams_.b),
+                gx: U512.fromBytes(curveParams_.gx),
+                gy: U512.fromBytes(curveParams_.gy),
+                p: U512.fromBytes(curveParams_.p),
+                n: U512.fromBytes(curveParams_.n),
+                lowSmax: U512.fromBytes(curveParams_.lowSmax)
             });
 
-            uint256 call = U512.initCall(params_.p);
+            call call = U512.initCall();
 
             /// accept s only from the lower part of the curve
             if (
-                U512.eqInteger(inputs_.r, 0) ||
+                U512.eqUint256(inputs_.r, 0) ||
                 U512.cmp(inputs_.r, params_.n) >= 0 ||
-                U512.eqInteger(inputs_.s, 0) ||
+                U512.eqUint256(inputs_.s, 0) ||
                 U512.cmp(inputs_.s, params_.lowSmax) > 0
             ) {
                 return false;
@@ -102,12 +132,12 @@ library ECDSA384 {
             {
                 uint256 hashedMessageLength_ = hashedMessage_.length;
 
-                if (hashedMessageLength_ < 48) {
-                    bytes memory tmp_ = new bytes(48);
+                if (hashedMessageLength_ < 64) {
+                    bytes memory tmp_ = new bytes(64);
 
                     MemoryUtils.unsafeCopy(
                         hashedMessage_.getDataPointer(),
-                        tmp_.getDataPointer() + 48 - hashedMessageLength_,
+                        tmp_.getDataPointer() + 64 - hashedMessageLength_,
                         hashedMessageLength_
                     );
 
@@ -115,17 +145,22 @@ library ECDSA384 {
                 }
             }
 
-            uint512 scalar1 = U512.moddiv(call, hashedMessage_.init(), inputs_.s, params_.n);
-            uint512 scalar2 = U512.moddiv(call, inputs_.r, inputs_.s, params_.n);
+            uint512 scalar1_ = U512.moddiv(
+                call,
+                U512.fromBytes(hashedMessage_),
+                inputs_.s,
+                params_.n
+            );
+            uint512 scalar2_ = U512.moddiv(call, inputs_.r, inputs_.s, params_.n);
 
             {
-                uint512 three = U512.init(3);
+                uint512 three_ = U512.fromUint256(3);
 
                 /// We use 6-bit masks where the first 3 bits refer to `scalar1` and the last 3 bits refer to `scalar2`.
                 uint512[2][64] memory points_ = _precomputePointsTable(
                     call,
                     params_.p,
-                    three,
+                    three_,
                     params_.a,
                     params_.gx,
                     params_.gy,
@@ -133,20 +168,20 @@ library ECDSA384 {
                     inputs_.y
                 );
 
-                (scalar1, ) = _doubleScalarMultiplication(
+                (scalar1_, ) = _doubleScalarMultiplication(
                     call,
                     params_.p,
-                    three,
+                    three_,
                     params_.a,
                     points_,
-                    scalar1,
-                    scalar2
+                    scalar1_,
+                    scalar2_
                 );
             }
 
-            U512.modAssign(call, scalar1, params_.n);
+            U512.modAssign(call, scalar1_, params_.n);
 
-            return U512.eq(scalar1, inputs_.r);
+            return U512.eq(scalar1_, inputs_.r);
         }
     }
 
@@ -154,30 +189,35 @@ library ECDSA384 {
      * @dev Check if a point in affine coordinates is on the curve.
      */
     function _isOnCurve(
-        uint256 call,
-        uint512 p,
-        uint512 a,
-        uint512 b,
-        uint512 x,
-        uint512 y
+        call call_,
+        uint512 p_,
+        uint512 a_,
+        uint512 b_,
+        uint512 x_,
+        uint512 y_
     ) private view returns (bool) {
         unchecked {
-            if (U512.eqInteger(x, 0) || U512.eq(x, p) || U512.eqInteger(y, 0) || U512.eq(y, p)) {
+            if (
+                U512.eqUint256(x_, 0) ||
+                U512.eq(x_, p_) ||
+                U512.eqUint256(y_, 0) ||
+                U512.eq(y_, p_)
+            ) {
                 return false;
             }
 
-            uint512 LHS = U512.modexp(call, y, 2);
-            uint512 RHS = U512.modexp(call, x, 3);
+            uint512 lhs_ = U512.modexp(call_, y_, U512.fromUint256(2), p_);
+            uint512 rhs_ = U512.modexp(call_, x_, U512.fromUint256(3), p_);
 
-            if (!U512.eqInteger(a, 0)) {
-                RHS = U512.modadd(RHS, U512.modmul(call, x, a), p); // x^3 + a*x
+            if (!U512.eqUint256(a_, 0)) {
+                rhs_ = U512.modadd(call_, rhs_, U512.modmul(call_, x_, a_, p_), p_); // x^3 + a*x
             }
 
-            if (!U512.eqInteger(b, 0)) {
-                RHS = U512.modadd(RHS, b, p); // x^3 + a*x + b
+            if (!U512.eqUint256(b_, 0)) {
+                rhs_ = U512.modadd(call_, rhs_, b_, p_); // x^3 + a*x + b
             }
 
-            return U512.eq(LHS, RHS);
+            return U512.eq(lhs_, rhs_);
         }
     }
 
@@ -185,42 +225,43 @@ library ECDSA384 {
      * @dev Compute the Strauss-Shamir double scalar multiplication scalar1*G + scalar2*H.
      */
     function _doubleScalarMultiplication(
-        uint256 call,
-        uint512 p,
-        uint512 three,
-        uint512 a,
-        uint512[2][64] memory points,
-        uint512 scalar1,
-        uint512 scalar2
-    ) private view returns (uint512 x, uint512 y) {
+        call call_,
+        uint512 p_,
+        uint512 three_,
+        uint512 a_,
+        uint512[2][64] memory points_,
+        uint512 scalar1_,
+        uint512 scalar2_
+    ) private view returns (uint512 x_, uint512 y_) {
         unchecked {
-            x = U512.init();
-            y = U512.init();
-
             uint256 mask_;
             uint256 mask1_;
             uint256 mask2_;
 
             for (uint256 bit = 3; bit <= 384; bit += 3) {
-                mask1_ = _getWord(scalar1, 384 - bit);
-                mask2_ = _getWord(scalar2, 384 - bit);
+                mask1_ = _getWord(scalar1_, 384 - bit);
+                mask2_ = _getWord(scalar2_, 384 - bit);
 
                 mask_ = (mask1_ << 3) | mask2_;
 
                 if (mask_ != 0) {
-                    (x, y) = _twice3Affine(call, p, three, a, x, y);
-                    (x, y) = _addAffine(
-                        call,
-                        p,
-                        three,
-                        a,
-                        points[mask_][0],
-                        points[mask_][1],
-                        x,
-                        y
+                    (x_, y_) = _twiceAffine(call_, p_, three_, a_, x_, y_);
+                    (x_, y_) = _twiceAffine(call_, p_, three_, a_, x_, y_);
+                    (x_, y_) = _twiceAffine(call_, p_, three_, a_, x_, y_);
+                    (x_, y_) = _addAffine(
+                        call_,
+                        p_,
+                        three_,
+                        a_,
+                        points_[mask_][0],
+                        points_[mask_][1],
+                        x_,
+                        y_
                     );
                 }
             }
+
+            return (x_, y_);
         }
     }
 
@@ -247,117 +288,36 @@ library ECDSA384 {
      * @dev Double an elliptic curve point in affine coordinates.
      */
     function _twiceAffine(
-        uint256 call,
-        uint512 p,
-        uint512 three,
-        uint512 a,
-        uint512 x1,
-        uint512 y1
-    ) private view returns (uint512 x2, uint512 y2) {
+        call call_,
+        uint512 p_,
+        uint512 three_,
+        uint512 a_,
+        uint512 x1_,
+        uint512 y1_
+    ) private view returns (uint512 x2_, uint512 y2_) {
         unchecked {
-            x2 = U512.init();
-            y2 = U512.init();
-
-            if (x1.isNull()) {
-                return (U512.init(), U512.init());
+            if (U512.isNull(x1_)) {
+                return (x2_, y2_);
             }
 
-            if (U512.eqInteger(y1, 0)) {
-                return (U512.init(), U512.init());
+            if (U512.eqUint256(y1_, 0)) {
+                return (x2_, y2_);
             }
 
-            uint512 m1 = U512.modexp(call, x1, 2);
-            U512.modmulAssign(call, m1, three);
-            U512.modaddAssign(m1, a, p);
+            uint512 m1_ = U512.modexp(call_, x1_, U512.fromUint256(2), p_);
+            U512.modmulAssign(call_, m1_, three_, p_);
+            U512.modaddAssign(call_, m1_, a_, p_);
 
-            uint512 m2 = U512.modshl1(y1, p);
-            U512.moddivAssign(call, m1, m2);
+            uint512 m2_ = U512.modmul(call_, y1_, U512.fromUint256(2), p_);
+            U512.moddivAssign(call_, m1_, m2_, p_);
 
-            x2 = U512.modexp(call, m1, 2);
-            U512.modsubAssign(x2, x1, p);
-            U512.modsubAssign(x2, x1, p);
+            x2_ = U512.modexp(call_, m1_, U512.fromUint256(2), p_);
+            U512.modsubAssign(call_, x2_, x1_, p_);
+            U512.modsubAssign(call_, x2_, x1_, p_);
 
-            y2 = U512.modsub(x1, x2, p);
-            U512.modmulAssign(call, y2, m1);
-            U512.modsubAssign(y2, y1, p);
-        }
-    }
-
-    /**
-     * @dev Doubles an elliptic curve point 3 times in affine coordinates.
-     */
-    function _twice3Affine(
-        uint256 call,
-        uint512 p,
-        uint512 three,
-        uint512 a,
-        uint512 x1,
-        uint512 y1
-    ) private view returns (uint512 x2, uint512 y2) {
-        unchecked {
-            x2 = U512.init();
-            y2 = U512.init();
-
-            if (x1.isNull()) {
-                return (U512.init(), U512.init());
-            }
-
-            if (U512.eqInteger(y1, 0)) {
-                return (U512.init(), U512.init());
-            }
-
-            uint512 m1 = U512.modexp(call, x1, 2);
-            U512.modmulAssign(call, m1, three);
-            U512.modaddAssign(m1, a, p);
-
-            uint512 m2 = U512.modshl1(y1, p);
-            U512.moddivAssign(call, m1, m2);
-
-            x2 = U512.modexp(call, m1, 2);
-            U512.modsubAssign(x2, x1, p);
-            U512.modsubAssign(x2, x1, p);
-
-            y2 = U512.modsub(x1, x2, p);
-            U512.modmulAssign(call, y2, m1);
-            U512.modsubAssign(y2, y1, p);
-
-            if (U512.eqInteger(y2, 0)) {
-                return (U512.init(), U512.init());
-            }
-
-            U512.modexpAssignTo(call, m1, x2, 2);
-            U512.modmulAssign(call, m1, three);
-            U512.modaddAssign(m1, a, p);
-
-            U512.modshl1AssignTo(m2, y2, p);
-            U512.moddivAssign(call, m1, m2);
-
-            U512.modexpAssignTo(call, x1, m1, 2);
-            U512.modsubAssign(x1, x2, p);
-            U512.modsubAssign(x1, x2, p);
-
-            U512.modsubAssignTo(y1, x2, x1, p);
-            U512.modmulAssign(call, y1, m1);
-            U512.modsubAssign(y1, y2, p);
-
-            if (U512.eqInteger(y1, 0)) {
-                return (U512.init(), U512.init());
-            }
-
-            U512.modexpAssignTo(call, m1, x1, 2);
-            U512.modmulAssign(call, m1, three);
-            U512.modaddAssign(m1, a, p);
-
-            U512.modshl1AssignTo(m2, y1, p);
-            U512.moddivAssign(call, m1, m2);
-
-            U512.modexpAssignTo(call, x2, m1, 2);
-            U512.modsubAssign(x2, x1, p);
-            U512.modsubAssign(x2, x1, p);
-
-            U512.modsubAssignTo(y2, x1, x2, p);
-            U512.modmulAssign(call, y2, m1);
-            U512.modsubAssign(y2, y1, p);
+            y2_ = U512.modsub(call_, x1_, x2_, p_);
+            U512.modmulAssign(call_, y2_, m1_, p_);
+            U512.modsubAssign(call_, y2_, y1_, p_);
         }
     }
 
@@ -365,63 +325,63 @@ library ECDSA384 {
      * @dev Add two elliptic curve points in affine coordinates.
      */
     function _addAffine(
-        uint256 call,
-        uint512 p,
-        uint512 three,
-        uint512 a,
-        uint512 x1,
-        uint512 y1,
-        uint512 x2,
-        uint512 y2
+        call call_,
+        uint512 p_,
+        uint512 three_,
+        uint512 a_,
+        uint512 x1_,
+        uint512 y1_,
+        uint512 x2_,
+        uint512 y2_
     ) private view returns (uint512 x3, uint512 y3) {
         unchecked {
-            x3 = U512.init();
-            y3 = U512.init();
-
-            if (x1.isNull() || x2.isNull()) {
-                if (x1.isNull() && x2.isNull()) {
-                    return (U512.init(), U512.init());
+            if (U512.isNull(x1_) || U512.isNull(x2_)) {
+                if (U512.isNull(x1_) && U512.isNull(x2_)) {
+                    return (x3, y3);
                 }
 
-                return x1.isNull() ? (x2.copy(), y2.copy()) : (x1.copy(), y1.copy());
+                return
+                    U512.isNull(x1_)
+                        ? (U512.copy(x2_), U512.copy(y2_))
+                        : (U512.copy(x1_), U512.copy(y1_));
             }
 
-            if (U512.eq(x1, x2)) {
-                if (U512.eq(y1, y2)) {
-                    return _twiceAffine(call, p, three, a, x1, y1);
+            if (U512.eq(x1_, x2_)) {
+                if (U512.eq(y1_, y2_)) {
+                    return _twiceAffine(call_, p_, three_, a_, x1_, y1_);
                 }
 
-                return (U512.init(), U512.init());
+                return (x3, y3);
             }
 
-            uint512 m1 = U512.modsub(y1, y2, p);
-            uint512 m2 = U512.modsub(x1, x2, p);
+            uint512 m1_ = U512.modsub(call_, y1_, y2_, p_);
+            uint512 m2_ = U512.modsub(call_, x1_, x2_, p_);
 
-            U512.moddivAssign(call, m1, m2);
+            U512.moddivAssign(call_, m1_, m2_, p_);
 
-            x3 = U512.modexp(call, m1, 2);
-            U512.modsubAssign(x3, x1, p);
-            U512.modsubAssign(x3, x2, p);
+            x3 = U512.modexp(call_, m1_, U512.fromUint256(2), p_);
+            U512.modsubAssign(call_, x3, x1_, p_);
+            U512.modsubAssign(call_, x3, x2_, p_);
 
-            y3 = U512.modsub(x1, x3, p);
-            U512.modmulAssign(call, y3, m1);
-            U512.modsubAssign(y3, y1, p);
+            y3 = U512.modsub(call_, x1_, x3, p_);
+            U512.modmulAssign(call_, y3, m1_, p_);
+            U512.modsubAssign(call_, y3, y1_, p_);
         }
     }
 
     function _precomputePointsTable(
-        uint256 call,
-        uint512 p,
-        uint512 three,
-        uint512 a,
-        uint512 gx,
-        uint512 gy,
-        uint512 hx,
-        uint512 hy
+        call call_,
+        uint512 p_,
+        uint512 three_,
+        uint512 a_,
+        uint512 gx_,
+        uint512 gy_,
+        uint512 hx_,
+        uint512 hy_
     ) private view returns (uint512[2][64] memory points_) {
         unchecked {
-            (points_[0x01][0], points_[0x01][1]) = (hx.copy(), hy.copy());
-            (points_[0x08][0], points_[0x08][1]) = (gx.copy(), gy.copy());
+            (points_[0x01][0], points_[0x01][1]) = (U512.copy(hx_), U512.copy(hy_));
+            (points_[0x08][0], points_[0x08][1]) = (U512.copy(gx_), U512.copy(gy_));
 
             for (uint256 i = 0; i < 8; ++i) {
                 for (uint256 j = 0; j < 8; ++j) {
@@ -432,34 +392,36 @@ library ECDSA384 {
                     uint256 maskTo = (i << 3) | j;
 
                     if (i != 0) {
-                        uint256 maskFrom = ((i - 1) << 3) | j;
+                        uint256 maskFrom_ = ((i - 1) << 3) | j;
 
                         (points_[maskTo][0], points_[maskTo][1]) = _addAffine(
-                            call,
-                            p,
-                            three,
-                            a,
-                            points_[maskFrom][0],
-                            points_[maskFrom][1],
-                            gx,
-                            gy
+                            call_,
+                            p_,
+                            three_,
+                            a_,
+                            points_[maskFrom_][0],
+                            points_[maskFrom_][1],
+                            gx_,
+                            gy_
                         );
                     } else {
-                        uint256 maskFrom = (i << 3) | (j - 1);
+                        uint256 maskFrom_ = (i << 3) | (j - 1);
 
                         (points_[maskTo][0], points_[maskTo][1]) = _addAffine(
-                            call,
-                            p,
-                            three,
-                            a,
-                            points_[maskFrom][0],
-                            points_[maskFrom][1],
-                            hx,
-                            hy
+                            call_,
+                            p_,
+                            three_,
+                            a_,
+                            points_[maskFrom_][0],
+                            points_[maskFrom_][1],
+                            hx_,
+                            hy_
                         );
                     }
                 }
             }
+
+            return points_;
         }
     }
 }
diff --git a/contracts/libs/crypto/bn/U512.sol b/contracts/libs/crypto/bn/U512.sol
index 260aef0b..1e7ca702 100644
--- a/contracts/libs/crypto/bn/U512.sol
+++ b/contracts/libs/crypto/bn/U512.sol
@@ -2,163 +2,99 @@
 pragma solidity ^0.8.4;
 
 type uint512 is uint256;
+type call is uint256;
 
 /**
- * @notice Low-level utility library that implements unsigned 512-bit arithmetics.
+ * @notice Low-level library that implements unsigned 512-bit arithmetics.
  */
 library U512 {
-    uint256 private constant SHORT_ALLOCATION = 64;
-    uint256 private constant LONG_ALLOCATION = 64;
+    uint256 private constant _UINT512_ALLOCATION = 64;
+    uint256 private constant _BYTES_ALLOCATION = 96;
+    uint256 private constant _CALL_ALLOCATION = 384;
 
-    uint256 private constant CALL_ALLOCATION = 4 * 288;
-
-    uint256 private constant MUL_OFFSET = 288;
-    uint256 private constant EXP_OFFSET = 2 * 288;
-    uint256 private constant INV_OFFSET = 3 * 288;
-
-    function toBytes(uint512 from_) internal pure returns (bytes memory bytes_) {
+    function initCall() internal pure returns (call call_) {
         unchecked {
-            uint512 handler_ = _allocate(LONG_ALLOCATION);
-
-            assembly {
-                mstore(handler_, 0x40)
-                mstore(add(handler_, 0x20), mload(from_))
-                mstore(add(handler_, 0x40), mload(add(from_, 0x20)))
-
-                bytes_ := handler_
-            }
+            call_ = call.wrap(_allocate(_CALL_ALLOCATION));
         }
     }
 
-    function isNull(uint512 handler) internal pure returns (bool isNull_) {
+    function fromUint256(uint256 u256_) internal pure returns (uint512 u512_) {
         unchecked {
-            assembly {
-                isNull_ := iszero(handler)
-            }
-        }
-    }
+            u512_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
 
-    function init() internal pure returns (uint512 handler_) {
-        unchecked {
             assembly {
-                handler_ := 0
+                mstore(u512_, 0x00)
+                mstore(add(u512_, 0x20), u256_)
             }
         }
     }
 
-    function init(uint256 from_) internal pure returns (uint512 handler_) {
+    function fromBytes(bytes memory bytes_) internal pure returns (uint512 u512_) {
         unchecked {
-            handler_ = _allocate(SHORT_ALLOCATION);
+            require(bytes_.length == 64, "U384: not 64 bytes");
+
+            u512_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
 
             assembly {
-                mstore(handler_, 0x00)
-                mstore(add(0x20, handler_), from_)
+                mstore(u512_, mload(add(bytes_, 0x20)))
+                mstore(add(u512_, 0x20), mload(add(bytes_, 0x40)))
             }
-
-            return handler_;
         }
     }
 
-    function init(bytes memory from_) internal pure returns (uint512 handler_) {
+    function copy(uint512 u512_) internal pure returns (uint512 u512Copy_) {
         unchecked {
-            require(from_.length == 48, "U384: not 384");
-
-            handler_ = _allocate(SHORT_ALLOCATION);
+            u512Copy_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
 
             assembly {
-                mstore(handler_, 0x00)
-                mstore(add(handler_, 0x10), mload(add(from_, 0x20)))
-                mstore(add(handler_, 0x20), mload(add(from_, 0x30)))
+                mstore(u512Copy_, mload(u512_))
+                mstore(add(u512Copy_, 0x20), mload(add(u512_, 0x20)))
             }
-
-            return handler_;
         }
     }
 
-    function init2(
-        bytes memory from2_
-    ) internal pure returns (uint512 handler1_, uint512 handler2_) {
+    function toBytes(uint512 u512_) internal pure returns (bytes memory bytes_) {
         unchecked {
-            require(from2_.length == 96, "U384: not 768");
-
-            handler1_ = _allocate(SHORT_ALLOCATION);
-            handler2_ = _allocate(SHORT_ALLOCATION);
+            uint256 handler_ = _allocate(_BYTES_ALLOCATION);
 
             assembly {
-                mstore(handler1_, 0x00)
-                mstore(add(handler1_, 0x10), mload(add(from2_, 0x20)))
-                mstore(add(handler1_, 0x20), mload(add(from2_, 0x30)))
+                mstore(handler_, 0x40)
+                mstore(add(handler_, 0x20), mload(u512_))
+                mstore(add(handler_, 0x40), mload(add(u512_, 0x20)))
 
-                mstore(handler2_, 0x00)
-                mstore(add(handler2_, 0x10), mload(add(from2_, 0x50)))
-                mstore(add(handler2_, 0x20), mload(add(from2_, 0x60)))
+                bytes_ := handler_
             }
-
-            return (handler1_, handler2_);
         }
     }
 
-    function initCall(uint512 m_) internal pure returns (uint256 handler_) {
+    function isNull(uint512 u512_) internal pure returns (bool isNull_) {
         unchecked {
-            handler_ = _allocateCall(CALL_ALLOCATION);
-
-            _sub(m_, init(2), uint512.wrap(handler_ + INV_OFFSET + 0xA0));
-
             assembly {
-                let call_ := add(handler_, MUL_OFFSET)
-
-                mstore(call_, 0x60)
-                mstore(add(0x20, call_), 0x20)
-                mstore(add(0x40, call_), 0x40)
-                mstore(add(0xC0, call_), 0x01)
-                mstore(add(0xE0, call_), mload(m_))
-                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
-
-                call_ := add(handler_, EXP_OFFSET)
-
-                mstore(call_, 0x40)
-                mstore(add(0x20, call_), 0x20)
-                mstore(add(0x40, call_), 0x40)
-                mstore(add(0xC0, call_), mload(m_))
-                mstore(add(0xE0, call_), mload(add(m_, 0x20)))
-
-                call_ := add(handler_, INV_OFFSET)
-
-                mstore(call_, 0x40)
-                mstore(add(0x20, call_), 0x40)
-                mstore(add(0x40, call_), 0x40)
-                mstore(add(0xE0, call_), mload(m_))
-                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
+                isNull_ := iszero(u512_)
             }
         }
     }
 
-    function copy(uint512 handler_) internal pure returns (uint512 handlerCopy_) {
+    function eq(uint512 a_, uint512 b_) internal pure returns (bool eq_) {
         unchecked {
-            handlerCopy_ = _allocate(SHORT_ALLOCATION);
-
             assembly {
-                mstore(handlerCopy_, mload(handler_))
-                mstore(add(handlerCopy_, 0x20), mload(add(handler_, 0x20)))
+                eq_ := and(
+                    eq(mload(a_), mload(b_)),
+                    eq(mload(add(a_, 0x20)), mload(add(b_, 0x20)))
+                )
             }
-
-            return handlerCopy_;
-        }
-    }
-
-    function eq(uint512 a_, uint512 b_) internal pure returns (bool eq_) {
-        assembly {
-            eq_ := and(eq(mload(a_), mload(b_)), eq(mload(add(a_, 0x20)), mload(add(b_, 0x20))))
         }
     }
 
-    function eqInteger(uint512 a_, uint256 bInteger_) internal pure returns (bool eq_) {
-        assembly {
-            eq_ := and(eq(mload(a_), 0), eq(mload(add(a_, 0x20)), bInteger_))
+    function eqUint256(uint512 a_, uint256 u256_) internal pure returns (bool eq_) {
+        unchecked {
+            assembly {
+                eq_ := and(eq(mload(a_), 0), eq(mload(add(a_, 0x20)), u256_))
+            }
         }
     }
 
-    function cmp(uint512 a_, uint512 b_) internal pure returns (int256 cmp_) {
+    function cmp(uint512 a_, uint512 b_) internal pure returns (int256) {
         unchecked {
             uint256 aWord_;
             uint256 bWord_;
@@ -188,229 +124,320 @@ library U512 {
             if (aWord_ < bWord_) {
                 return -1;
             }
+
+            return 0;
+        }
+    }
+
+    function mod(call call_, uint512 a_, uint512 m_) internal view returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
+
+            _mod(call_, a_, m_, r_);
+        }
+    }
+
+    function modAssign(call call_, uint512 a_, uint512 m_) internal view {
+        unchecked {
+            _mod(call_, a_, m_, a_);
+        }
+    }
+
+    function modAssignTo(call call_, uint512 a_, uint512 m_, uint512 to_) internal view {
+        unchecked {
+            _mod(call_, a_, m_, to_);
+        }
+    }
+
+    function modinv(call call_, uint512 a_, uint512 m_) internal view returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
+
+            _modinv(call_, a_, m_, r_);
         }
     }
 
-    function modAssign(uint256 call_, uint512 a_, uint512 m_) internal view {
-        assembly {
-            mstore(call_, 0x40)
-            mstore(add(0x20, call_), 0x20)
-            mstore(add(0x40, call_), 0x40)
-            mstore(add(0x60, call_), mload(a_))
-            mstore(add(0x80, call_), mload(add(a_, 0x20)))
-            mstore(add(0xA0, call_), 0x01)
-            mstore(add(0xC0, call_), mload(m_))
-            mstore(add(0xE0, call_), mload(add(m_, 0x20)))
+    function modinvAssign(call call_, uint512 a_, uint512 m_) internal view {
+        unchecked {
+            _modinv(call_, a_, m_, a_);
+        }
+    }
 
-            pop(staticcall(gas(), 0x5, call_, 0x0100, a_, 0x40))
+    function modinvAssignTo(call call_, uint512 a_, uint512 m_, uint512 to_) internal view {
+        unchecked {
+            _modinv(call_, a_, m_, to_);
         }
     }
 
     function modexp(
-        uint256 call_,
+        call call_,
         uint512 b_,
-        uint256 eInteger_
+        uint512 e_,
+        uint512 m_
     ) internal view returns (uint512 r_) {
         unchecked {
-            r_ = _allocate(SHORT_ALLOCATION);
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
 
-            assembly {
-                call_ := add(call_, EXP_OFFSET)
-
-                mstore(add(0x60, call_), mload(b_))
-                mstore(add(0x80, call_), mload(add(b_, 0x20)))
-                mstore(add(0xA0, call_), eInteger_)
-
-                pop(staticcall(gas(), 0x5, call_, 0x0100, r_, 0x40))
-            }
+            _modexp(call_, b_, e_, m_, r_);
+        }
+    }
 
-            return r_;
+    function modexpAssign(call call_, uint512 b_, uint512 e_, uint512 m_) internal view {
+        unchecked {
+            _modexp(call_, b_, e_, m_, b_);
         }
     }
 
     function modexpAssignTo(
-        uint256 call_,
-        uint512 to_,
+        call call_,
         uint512 b_,
-        uint256 eInteger_
+        uint512 e_,
+        uint512 m_,
+        uint512 to_
     ) internal view {
-        assembly {
-            call_ := add(call_, EXP_OFFSET)
+        unchecked {
+            _modexp(call_, b_, e_, m_, to_);
+        }
+    }
 
-            mstore(add(0x60, call_), mload(b_))
-            mstore(add(0x80, call_), mload(add(b_, 0x20)))
-            mstore(add(0xA0, call_), eInteger_)
+    function modadd(
+        call call_,
+        uint512 a_,
+        uint512 b_,
+        uint512 m_
+    ) internal view returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
 
-            pop(staticcall(gas(), 0x5, call_, 0x0100, to_, 0x40))
+            _modadd(call_, a_, b_, m_, r_);
         }
     }
 
-    function modadd(uint512 a_, uint512 b_, uint512 m_) internal pure returns (uint512 r_) {
+    function modaddAssign(call call_, uint512 a_, uint512 b_, uint512 m_) internal view {
         unchecked {
-            r_ = _allocate(SHORT_ALLOCATION);
+            _modadd(call_, a_, b_, m_, a_);
+        }
+    }
 
-            _add(a_, b_, r_);
+    function modaddAssignTo(
+        call call_,
+        uint512 a_,
+        uint512 b_,
+        uint512 m_,
+        uint512 to_
+    ) internal view {
+        unchecked {
+            _modadd(call_, a_, b_, m_, to_);
+        }
+    }
 
-            if (cmp(r_, m_) >= 0) {
-                _subFrom(r_, m_);
-            }
+    function add(uint512 a_, uint512 b_) internal pure returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
 
-            return r_;
+            _add(a_, b_, r_);
         }
     }
 
-    function modaddAssign(uint512 a_, uint512 b_, uint512 m_) internal pure {
+    function addAssign(uint512 a_, uint512 b_) internal pure {
         unchecked {
-            _addTo(a_, b_);
-
-            if (cmp(a_, m_) >= 0) {
-                return _subFrom(a_, m_);
-            }
+            _add(a_, b_, a_);
         }
     }
 
-    function modmul(uint256 call_, uint512 a_, uint512 b_) internal view returns (uint512 r_) {
+    function addAssignTo(uint512 a_, uint512 b_, uint512 to_) internal pure {
         unchecked {
-            r_ = _allocate(SHORT_ALLOCATION);
+            _add(a_, b_, to_);
+        }
+    }
 
-            _mul(a_, b_, uint512.wrap(call_ + MUL_OFFSET + 0x60));
+    function modsub(
+        call call_,
+        uint512 a_,
+        uint512 b_,
+        uint512 m_
+    ) internal view returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
 
-            assembly {
-                call_ := add(call_, MUL_OFFSET)
+            _modsub(call_, a_, b_, m_, r_);
+        }
+    }
 
-                pop(staticcall(gas(), 0x5, call_, 0x0120, r_, 0x40))
-            }
+    function modsubAssign(call call_, uint512 a_, uint512 b_, uint512 m_) internal view {
+        unchecked {
+            _modsub(call_, a_, b_, m_, a_);
+        }
+    }
 
-            return r_;
+    function modsubAssignTo(
+        call call_,
+        uint512 a_,
+        uint512 b_,
+        uint512 m_,
+        uint512 to_
+    ) internal view {
+        unchecked {
+            _modsub(call_, a_, b_, m_, to_);
         }
     }
 
-    function modmulAssign(uint256 call_, uint512 a_, uint512 b_) internal view {
+    function sub(uint512 a_, uint512 b_) internal pure returns (uint512 r_) {
         unchecked {
-            _mul(a_, b_, uint512.wrap(call_ + MUL_OFFSET + 0x60));
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
 
-            assembly {
-                call_ := add(call_, MUL_OFFSET)
+            _sub(a_, b_, r_);
+        }
+    }
 
-                pop(staticcall(gas(), 0x5, call_, 0x0120, a_, 0x40))
-            }
+    function subAssign(uint512 a_, uint512 b_) internal pure {
+        unchecked {
+            _sub(a_, b_, a_);
         }
     }
 
-    function modsub(uint512 a_, uint512 b_, uint512 m_) internal pure returns (uint512 r_) {
+    function subAssignTo(uint512 a_, uint512 b_, uint512 to_) internal pure {
         unchecked {
-            r_ = _allocate(SHORT_ALLOCATION);
+            _sub(a_, b_, to_);
+        }
+    }
 
-            if (cmp(a_, b_) >= 0) {
-                _sub(a_, b_, r_);
-                return r_;
-            }
+    function modmul(
+        call call_,
+        uint512 a_,
+        uint512 b_,
+        uint512 m_
+    ) internal view returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
 
-            _add(a_, m_, r_);
-            _subFrom(r_, b_);
+            _modmul(call_, a_, b_, m_, r_);
         }
     }
 
-    function modsubAssign(uint512 a_, uint512 b_, uint512 m_) internal pure {
+    function modmulAssign(call call_, uint512 a_, uint512 b_, uint512 m_) internal view {
         unchecked {
-            if (cmp(a_, b_) >= 0) {
-                _subFrom(a_, b_);
-                return;
-            }
+            _modmul(call_, a_, b_, m_, a_);
+        }
+    }
 
-            _addTo(a_, m_);
-            _subFrom(a_, b_);
+    function modmulAssignTo(
+        call call_,
+        uint512 a_,
+        uint512 b_,
+        uint512 m_,
+        uint512 to_
+    ) internal view {
+        unchecked {
+            _modmul(call_, a_, b_, m_, to_);
         }
     }
 
-    function modsubAssignTo(uint512 to_, uint512 a_, uint512 b_, uint512 m_) internal pure {
+    function mul(uint512 a_, uint512 b_) internal pure returns (uint512 r_) {
         unchecked {
-            if (cmp(a_, b_) >= 0) {
-                _sub(a_, b_, to_);
-                return;
-            }
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
 
-            _add(a_, m_, to_);
-            _subFrom(to_, b_);
+            _mul(a_, b_, r_);
         }
     }
 
-    function modshl1(uint512 a_, uint512 m_) internal pure returns (uint512 r_) {
+    function mulAssign(uint512 a_, uint512 b_) internal pure {
         unchecked {
-            r_ = _allocate(SHORT_ALLOCATION);
+            _mul(a_, b_, a_);
+        }
+    }
 
-            _shl1(a_, r_);
+    function mulAssignTo(uint512 a_, uint512 b_, uint512 to_) internal pure {
+        unchecked {
+            _mul(a_, b_, to_);
+        }
+    }
 
-            if (cmp(r_, m_) >= 0) {
-                _subFrom(r_, m_);
-            }
+    function moddiv(
+        call call_,
+        uint512 a_,
+        uint512 b_,
+        uint512 m_
+    ) internal view returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
 
-            return r_;
+            _moddiv(call_, a_, b_, m_, r_);
         }
     }
 
-    function modshl1AssignTo(uint512 to_, uint512 a_, uint512 m_) internal pure {
+    function moddivAssign(call call_, uint512 a_, uint512 b_, uint512 m_) internal view {
         unchecked {
-            _shl1(a_, to_);
+            _moddiv(call_, a_, b_, m_, a_);
+        }
+    }
 
-            if (cmp(to_, m_) >= 0) {
-                _subFrom(to_, m_);
-            }
+    function moddivAssignTo(
+        call call_,
+        uint512 a_,
+        uint512 b_,
+        uint512 m_,
+        uint512 to_
+    ) internal view {
+        unchecked {
+            _moddiv(call_, a_, b_, m_, to_);
         }
     }
 
-    /// @dev Stores modinv into `b_` and moddiv into `a_`.
-    function moddivAssign(uint256 call_, uint512 a_, uint512 b_) internal view {
+    function _mod(call call_, uint512 a_, uint512 m_, uint512 r_) private view {
         unchecked {
             assembly {
-                call_ := add(call_, INV_OFFSET)
-
-                mstore(add(0x60, call_), mload(b_))
-                mstore(add(0x80, call_), mload(add(b_, 0x20)))
+                mstore(call_, 0x40)
+                mstore(add(call_, 0x20), 0x20)
+                mstore(add(call_, 0x40), 0x40)
+                mstore(add(call_, 0x60), mload(a_))
+                mstore(add(call_, 0x80), mload(add(a_, 0x20)))
+                mstore(add(call_, 0xA0), 0x01)
+                mstore(add(call_, 0xC0), mload(m_))
+                mstore(add(call_, 0xE0), mload(add(m_, 0x20)))
 
-                pop(staticcall(gas(), 0x5, call_, 0x0120, b_, 0x40))
+                pop(staticcall(gas(), 0x5, call_, 0x0100, r_, 0x40))
             }
-
-            modmulAssign(call_ - INV_OFFSET, a_, b_);
         }
     }
 
-    function moddiv(
-        uint256 call_,
-        uint512 a_,
-        uint512 b_,
-        uint512 m_
-    ) internal view returns (uint512 r_) {
+    function _modexp(call call_, uint512 a_, uint512 e_, uint512 m_, uint512 r_) private view {
         unchecked {
-            r_ = modinv(call_, b_, m_);
-
-            _mul(a_, r_, uint512.wrap(call_ + 0x60));
-
             assembly {
-                mstore(call_, 0x60)
-                mstore(add(0x20, call_), 0x20)
-                mstore(add(0x40, call_), 0x40)
-                mstore(add(0xC0, call_), 0x01)
-                mstore(add(0xE0, call_), mload(m_))
-                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
+                mstore(call_, 0x40)
+                mstore(add(call_, 0x20), 0x40)
+                mstore(add(call_, 0x40), 0x40)
+                mstore(add(call_, 0x60), mload(a_))
+                mstore(add(call_, 0x80), mload(add(a_, 0x20)))
+                mstore(add(call_, 0xA0), mload(e_))
+                mstore(add(call_, 0xC0), mload(add(e_, 0x20)))
+                mstore(add(call_, 0xE0), mload(m_))
+                mstore(add(call_, 0x01000), mload(add(m_, 0x20)))
 
                 pop(staticcall(gas(), 0x5, call_, 0x0120, r_, 0x40))
             }
         }
     }
 
-    function modinv(uint256 call_, uint512 b_, uint512 m_) internal view returns (uint512 r_) {
+    function _modinv(call call_, uint512 a_, uint512 m_, uint512 r_) private view {
         unchecked {
-            r_ = _allocate(SHORT_ALLOCATION);
+            uint512 buffer_ = _buffer(call_);
+
+            assembly {
+                mstore(buffer_, 0x00)
+                mstore(add(buffer_, 0x20), 0x02)
+            }
 
-            _sub(m_, init(2), uint512.wrap(call_ + 0xA0));
+            _sub(m_, buffer_, buffer_);
 
             assembly {
                 mstore(call_, 0x40)
                 mstore(add(0x20, call_), 0x40)
                 mstore(add(0x40, call_), 0x40)
-                mstore(add(0x60, call_), mload(b_))
-                mstore(add(0x80, call_), mload(add(b_, 0x20)))
+                mstore(add(0x60, call_), mload(a_))
+                mstore(add(0x80, call_), mload(add(a_, 0x20)))
+                mstore(add(0xA0, call_), mload(buffer_))
+                mstore(add(0xC0, call_), mload(add(buffer_, 0x20)))
                 mstore(add(0xE0, call_), mload(m_))
                 mstore(add(0x0100, call_), mload(add(m_, 0x20)))
 
@@ -419,157 +446,328 @@ library U512 {
         }
     }
 
-    function _shl1(uint512 a_, uint512 r_) internal pure {
-        assembly {
-            let a1_ := mload(add(a_, 0x20))
+    function _add(uint512 a_, uint512 b_, uint512 r_) private pure {
+        unchecked {
+            assembly {
+                let aWord_ := mload(add(a_, 0x20))
+                let sum_ := add(aWord_, mload(add(b_, 0x20)))
+
+                mstore(r_, sum_)
 
-            mstore(r_, or(shl(1, mload(a_)), shr(255, a1_)))
-            mstore(add(r_, 0x20), shl(1, a1_))
+                sum_ := gt(aWord_, sum_)
+                sum_ := add(sum_, add(aWord_, mload(b_)))
+
+                mstore(add(r_, 0x20), sum_)
+            }
         }
     }
 
-    function _add(uint512 a_, uint512 b_, uint512 r_) private pure {
-        assembly {
-            let aWord_ := mload(add(a_, 0x20))
-            let sum_ := add(aWord_, mload(add(b_, 0x20)))
+    function _modadd(call call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) private view {
+        unchecked {
+            assembly {
+                let aWord_ := mload(add(a_, 0x20))
+                let sum_ := add(aWord_, mload(add(b_, 0x20)))
+
+                mstore(add(call_, 0xA0), sum_)
 
-            mstore(add(r_, 0x20), sum_)
+                sum_ := gt(aWord_, sum_)
+                sum_ := add(sum_, add(aWord_, mload(b_)))
 
-            sum_ := gt(aWord_, sum_)
-            sum_ := add(sum_, add(mload(a_), mload(b_)))
+                mstore(add(call_, 0x80), sum_)
+                mstore(add(call_, 0x60), gt(mload(a_), sum_))
+
+                mstore(call_, 0x40)
+                mstore(add(call_, 0x20), 0x20)
+                mstore(add(call_, 0x40), 0x40)
+                mstore(add(call_, 0xC0), 0x01)
+                mstore(add(call_, 0xE0), mload(m_))
+                mstore(add(call_, 0x0100), mload(add(m_, 0x20)))
 
-            mstore(r_, sum_)
+                pop(staticcall(gas(), 0x5, call_, 0x0120, r_, 0x40))
+            }
         }
     }
 
     function _sub(uint512 a_, uint512 b_, uint512 r_) private pure {
-        assembly {
-            let aWord_ := mload(add(a_, 0x20))
-            let diff_ := sub(aWord_, mload(add(b_, 0x20)))
+        unchecked {
+            assembly {
+                let aWord_ := mload(add(a_, 0x20))
+                let diff_ := sub(aWord_, mload(add(b_, 0x20)))
 
-            mstore(add(r_, 0x20), diff_)
+                mstore(add(r_, 0x20), diff_)
 
-            diff_ := gt(diff_, aWord_)
-            diff_ := sub(sub(mload(a_), mload(b_)), diff_)
+                diff_ := gt(diff_, aWord_)
+                diff_ := sub(sub(mload(a_), mload(b_)), diff_)
 
-            mstore(r_, diff_)
+                mstore(r_, diff_)
+            }
         }
     }
 
-    function _subFrom(uint512 a_, uint512 b_) private pure {
-        assembly {
-            let aWord_ := mload(add(a_, 0x20))
-            let diff_ := sub(aWord_, mload(add(b_, 0x20)))
+    function _modsub(call call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) private view {
+        unchecked {
+            int cmp_ = cmp(a_, b_);
 
-            mstore(add(a_, 0x20), diff_)
+            if (cmp_ >= 0) {
+                _sub(a_, b_, r_);
+            } else {
+                _sub(b_, a_, r_);
+            }
 
-            diff_ := gt(diff_, aWord_)
-            diff_ := sub(sub(mload(a_), mload(b_)), diff_)
+            assembly {
+                mstore(call_, 0x40)
+                mstore(add(call_, 0x20), 0x20)
+                mstore(add(call_, 0x40), 0x40)
+                mstore(add(call_, 0x60), mload(r_))
+                mstore(add(call_, 0x80), mload(add(r_, 0x20)))
+                mstore(add(call_, 0xA0), 0x01)
+                mstore(add(call_, 0xC0), mload(m_))
+                mstore(add(call_, 0xE0), mload(add(m_, 0x20)))
+
+                pop(staticcall(gas(), 0x5, call_, 0x0100, r_, 0x40))
+            }
 
-            mstore(a_, diff_)
+            if (cmp_ < 0) {
+                _sub(m_, r_, r_);
+            }
         }
     }
 
-    function _addTo(uint512 a_, uint512 b_) private pure {
-        assembly {
-            let aWord_ := mload(add(a_, 0x20))
-            let sum_ := add(aWord_, mload(add(b_, 0x20)))
+    function _mul(uint512 a_, uint512 b_, uint512 r_) private pure {
+        unchecked {
+            assembly {
+                let a0_ := shr(128, mload(a_))
+                let a1_ := and(mload(a_), 0xffffffffffffffffffffffffffffffff)
+                let a2_ := shr(128, mload(add(a_, 0x20)))
+                let a3_ := and(mload(add(a_, 0x20)), 0xffffffffffffffffffffffffffffffff)
+
+                let b0_ := shr(128, mload(b_))
+                let b1_ := and(mload(b_), 0xffffffffffffffffffffffffffffffff)
+                let b2_ := shr(128, mload(add(b_, 0x20)))
+                let b3_ := and(mload(add(b_, 0x20)), 0xffffffffffffffffffffffffffffffff)
+
+                // r7
+                let current_ := mul(a3_, b3_)
+                let ri_ := and(current_, 0xffffffffffffffffffffffffffffffff)
+
+                // r6
+                current_ := shr(128, current_)
+
+                let temp_ := mul(a3_, b2_)
+                current_ := add(current_, temp_)
+                let curry_ := lt(current_, temp_)
 
-            mstore(add(a_, 0x20), sum_)
+                temp_ := mul(a2_, b3_)
+                current_ := add(current_, temp_)
+                curry_ := add(curry_, lt(current_, temp_))
 
-            sum_ := gt(aWord_, sum_)
-            sum_ := add(sum_, add(mload(a_), mload(b_)))
+                mstore(add(r_, 0x20), add(shl(128, current_), ri_))
 
-            mstore(a_, sum_)
+                // r5
+                current_ := add(shl(128, curry_), shr(128, current_))
+                curry_ := 0
+
+                temp_ := mul(a3_, b1_)
+                current_ := add(current_, temp_)
+                curry_ := lt(current_, temp_)
+
+                temp_ := mul(a2_, b2_)
+                current_ := add(current_, temp_)
+                curry_ := add(curry_, lt(current_, temp_))
+
+                temp_ := mul(a1_, b3_)
+                current_ := add(current_, temp_)
+                curry_ := add(curry_, lt(current_, temp_))
+
+                ri_ := and(current_, 0xffffffffffffffffffffffffffffffff)
+
+                // r4
+                current_ := add(shl(128, curry_), shr(128, current_))
+                curry_ := 0
+
+                temp_ := mul(a3_, b0_)
+                current_ := add(current_, temp_)
+                curry_ := lt(current_, temp_)
+
+                temp_ := mul(a2_, b1_)
+                current_ := add(current_, temp_)
+                curry_ := add(curry_, lt(current_, temp_))
+
+                temp_ := mul(a1_, b2_)
+                current_ := add(current_, temp_)
+                curry_ := add(curry_, lt(current_, temp_))
+
+                temp_ := mul(a0_, b2_)
+                current_ := add(current_, temp_)
+                curry_ := add(curry_, lt(current_, temp_))
+
+                mstore(r_, add(shl(128, current_), ri_))
+            }
         }
     }
 
-    function _mul(uint512 a_, uint512 b_, uint512 r_) private pure {
-        assembly {
-            let a0_ := mload(a_)
-            let a1_ := shr(128, mload(add(a_, 0x20)))
-            let a2_ := and(mload(add(a_, 0x20)), 0xffffffffffffffffffffffffffffffff)
+    function _modmulOverflow(call call_, uint512 a_, uint512 b_) private pure {
+        unchecked {
+            assembly {
+                let a0_ := shr(128, mload(a_))
+                let a1_ := and(mload(a_), 0xffffffffffffffffffffffffffffffff)
+                let a2_ := shr(128, mload(add(a_, 0x20)))
+                let a3_ := and(mload(add(a_, 0x20)), 0xffffffffffffffffffffffffffffffff)
+
+                let b0_ := shr(128, mload(b_))
+                let b1_ := and(mload(b_), 0xffffffffffffffffffffffffffffffff)
+                let b2_ := shr(128, mload(add(b_, 0x20)))
+                let b3_ := and(mload(add(b_, 0x20)), 0xffffffffffffffffffffffffffffffff)
+
+                // r7
+                let current_ := mul(a3_, b3_)
+                let ri_ := and(current_, 0xffffffffffffffffffffffffffffffff)
 
-            let b0_ := mload(b_)
-            let b1_ := shr(128, mload(add(b_, 0x20)))
-            let b2_ := and(mload(add(b_, 0x20)), 0xffffffffffffffffffffffffffffffff)
+                // r6
+                current_ := shr(128, current_)
 
-            // r5
-            let current_ := mul(a2_, b2_)
-            let r0_ := and(current_, 0xffffffffffffffffffffffffffffffff)
+                let temp_ := mul(a3_, b2_)
+                current_ := add(current_, temp_)
+                let curry_ := lt(current_, temp_)
 
-            // r4
-            current_ := shr(128, current_)
+                temp_ := mul(a2_, b3_)
+                current_ := add(current_, temp_)
+                curry_ := add(curry_, lt(current_, temp_))
 
-            let temp_ := mul(a1_, b2_)
-            current_ := add(current_, temp_)
-            let curry_ := lt(current_, temp_)
+                mstore(add(call_, 0xC0), add(shl(128, current_), ri_))
 
-            temp_ := mul(a2_, b1_)
-            current_ := add(current_, temp_)
-            curry_ := add(curry_, lt(current_, temp_))
+                // r5
+                current_ := add(shl(128, curry_), shr(128, current_))
+                curry_ := 0
 
-            mstore(add(r_, 0x40), add(shl(128, current_), r0_))
+                temp_ := mul(a3_, b1_)
+                current_ := add(current_, temp_)
+                curry_ := lt(current_, temp_)
 
-            // r3
-            current_ := add(shl(128, curry_), shr(128, current_))
-            curry_ := 0
+                temp_ := mul(a2_, b2_)
+                current_ := add(current_, temp_)
+                curry_ := add(curry_, lt(current_, temp_))
 
-            temp_ := mul(a0_, b2_)
-            current_ := add(current_, temp_)
-            curry_ := lt(current_, temp_)
+                temp_ := mul(a1_, b3_)
+                current_ := add(current_, temp_)
+                curry_ := add(curry_, lt(current_, temp_))
 
-            temp_ := mul(a1_, b1_)
-            current_ := add(current_, temp_)
-            curry_ := add(curry_, lt(current_, temp_))
+                ri_ := and(current_, 0xffffffffffffffffffffffffffffffff)
 
-            temp_ := mul(a2_, b0_)
-            current_ := add(current_, temp_)
-            curry_ := add(curry_, lt(current_, temp_))
+                // r4
+                current_ := add(shl(128, curry_), shr(128, current_))
+                curry_ := 0
 
-            r0_ := and(current_, 0xffffffffffffffffffffffffffffffff)
+                temp_ := mul(a3_, b0_)
+                current_ := add(current_, temp_)
+                curry_ := lt(current_, temp_)
 
-            // r2
-            current_ := add(shl(128, curry_), shr(128, current_))
-            curry_ := 0
+                temp_ := mul(a2_, b1_)
+                current_ := add(current_, temp_)
+                curry_ := add(curry_, lt(current_, temp_))
 
-            temp_ := mul(a0_, b1_)
-            current_ := add(current_, temp_)
-            curry_ := lt(current_, temp_)
+                temp_ := mul(a1_, b2_)
+                current_ := add(current_, temp_)
+                curry_ := add(curry_, lt(current_, temp_))
 
-            temp_ := mul(a1_, b0_)
-            current_ := add(current_, temp_)
-            curry_ := add(curry_, lt(current_, temp_))
+                temp_ := mul(a0_, b2_)
+                current_ := add(current_, temp_)
+                curry_ := add(curry_, lt(current_, temp_))
 
-            mstore(add(r_, 0x20), add(shl(128, current_), r0_))
+                mstore(add(call_, 0xA0), add(shl(128, current_), ri_))
 
-            // r1
-            current_ := add(shl(128, curry_), shr(128, current_))
-            current_ := add(current_, mul(a0_, b0_))
+                // r3
+                current_ := add(shl(128, curry_), shr(128, current_))
+                curry_ := 0
 
-            mstore(r_, current_)
+                temp_ := mul(a2_, b0_)
+                current_ := add(current_, temp_)
+                curry_ := lt(current_, temp_)
+
+                temp_ := mul(a1_, b1_)
+                current_ := add(current_, temp_)
+                curry_ := add(curry_, lt(current_, temp_))
+
+                temp_ := mul(a0_, b2_)
+                current_ := add(current_, temp_)
+                curry_ := add(curry_, lt(current_, temp_))
+
+                ri_ := and(current_, 0xffffffffffffffffffffffffffffffff)
+
+                // r2
+                current_ := add(shl(128, curry_), shr(128, current_))
+                curry_ := 0
+
+                temp_ := mul(a1_, b0_)
+                current_ := add(current_, temp_)
+                curry_ := lt(current_, temp_)
+
+                temp_ := mul(a0_, b1_)
+                current_ := add(current_, temp_)
+                curry_ := add(curry_, lt(current_, temp_))
+
+                mstore(add(call_, 0x80), add(shl(128, current_), ri_))
+
+                // r1
+                current_ := add(shl(128, curry_), shr(128, current_))
+                current_ := add(current_, mul(a0_, b0_))
+
+                mstore(add(call_, 0x60), current_)
+            }
         }
     }
 
-    function _allocate(uint256 bytes_) private pure returns (uint512 handler_) {
+    function _modmul(call call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) private view {
         unchecked {
+            _modmulOverflow(call_, a_, b_);
+
             assembly {
-                handler_ := mload(0x40)
-                mstore(0x40, add(handler_, bytes_))
+                mstore(call_, 0x40)
+                mstore(add(call_, 0x20), 0x20)
+                mstore(add(call_, 0x40), 0x40)
+                mstore(add(call_, 0xE0), 0x01)
+                mstore(add(call_, 0x0100), mload(m_))
+                mstore(add(call_, 0x0120), mload(add(m_, 0x20)))
+
+                pop(staticcall(gas(), 0x5, call_, 0x0140, r_, 0x40))
             }
+        }
+    }
 
-            return handler_;
+    function _moddiv(call call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) internal view {
+        unchecked {
+            uint512 buffer_ = _buffer(call_);
+
+            _modinv(call_, b_, m_, buffer_);
+            _modmulOverflow(call_, a_, b_);
+
+            assembly {
+                mstore(call_, 0x60)
+                mstore(add(0x20, call_), 0x20)
+                mstore(add(0x40, call_), 0x40)
+                mstore(add(0xE0, call_), 0x01)
+                mstore(add(0x0100, call_), mload(m_))
+                mstore(add(0x0120, call_), mload(add(m_, 0x20)))
+
+                pop(staticcall(gas(), 0x5, call_, 0x0140, r_, 0x40))
+            }
+        }
+    }
+
+    function _buffer(call call_) private pure returns (uint512 buffer_) {
+        unchecked {
+            assembly {
+                buffer_ := add(call_, 0x0140)
+            }
         }
     }
 
-    function _allocateCall(uint256 bytes_) private pure returns (uint256 handler_) {
+    function _allocate(uint256 bytes_) private pure returns (uint256 handler_) {
         unchecked {
             assembly {
                 handler_ := mload(0x40)
                 mstore(0x40, add(handler_, bytes_))
             }
-
-            return handler_;
         }
     }
 }
diff --git a/contracts/mock/libs/crypto/ECDSA384Mock.sol b/contracts/mock/libs/crypto/ECDSA384Mock.sol
index 01054f1f..0d0f606e 100644
--- a/contracts/mock/libs/crypto/ECDSA384Mock.sol
+++ b/contracts/mock/libs/crypto/ECDSA384Mock.sol
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: MIT
 pragma solidity ^0.8.4;
 
-import "../../../libs/crypto/bn/U512.sol";
+import {uint512} from "../../../libs/crypto/bn/U512.sol";
+import {U512} from "../../../libs/crypto/bn/U512.sol";
 import {ECDSA384} from "../../../libs/crypto/ECDSA384.sol";
 
 contract ECDSA384Mock {

From ba239ee3f4e4edde556661e777c2e5f204bb7c5f Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Wed, 15 Jan 2025 21:55:53 +0200
Subject: [PATCH 04/42] wip

---
 contracts/libs/crypto/ECDSA384.sol | 62 ++++++------------------
 contracts/libs/crypto/bn/U512.sol  | 77 +++++++++++++++++++-----------
 2 files changed, 62 insertions(+), 77 deletions(-)

diff --git a/contracts/libs/crypto/ECDSA384.sol b/contracts/libs/crypto/ECDSA384.sol
index 60a01e45..20b3bf82 100644
--- a/contracts/libs/crypto/ECDSA384.sol
+++ b/contracts/libs/crypto/ECDSA384.sol
@@ -70,37 +70,8 @@ library ECDSA384 {
         unchecked {
             _Inputs memory inputs_;
 
-            {
-                bytes memory lhs_ = new bytes(64);
-                bytes memory rhs_ = new bytes(64);
-
-                MemoryUtils.unsafeCopy(
-                    signature_.getDataPointer(),
-                    lhs_.getDataPointer() + 0x10,
-                    48
-                );
-                MemoryUtils.unsafeCopy(
-                    signature_.getDataPointer() + 0x30,
-                    rhs_.getDataPointer() + 0x10,
-                    48
-                );
-
-                (inputs_.r, inputs_.s) = (U512.fromBytes(lhs_), U512.fromBytes(rhs_));
-
-                MemoryUtils.unsafeCopy(pubKey_.getDataPointer(), lhs_.getDataPointer() + 0x10, 48);
-                MemoryUtils.unsafeCopy(
-                    pubKey_.getDataPointer() + 0x30,
-                    rhs_.getDataPointer() + 0x10,
-                    48
-                );
-
-                (inputs_.x, inputs_.y) = (U512.fromBytes(lhs_), U512.fromBytes(rhs_));
-            }
-
-            console.logBytes(U512.toBytes(inputs_.x));
-            console.logBytes(U512.toBytes(inputs_.y));
-            console.logBytes(U512.toBytes(inputs_.r));
-            console.logBytes(U512.toBytes(inputs_.s));
+            (inputs_.r, inputs_.s) = _u512FromBytes2(signature_);
+            (inputs_.x, inputs_.y) = _u512FromBytes2(pubKey_);
 
             _Parameters memory params_ = _Parameters({
                 a: U512.fromBytes(curveParams_.a),
@@ -128,23 +99,6 @@ library ECDSA384 {
                 return false;
             }
 
-            /// allow compatibility with non-384-bit hash functions.
-            {
-                uint256 hashedMessageLength_ = hashedMessage_.length;
-
-                if (hashedMessageLength_ < 64) {
-                    bytes memory tmp_ = new bytes(64);
-
-                    MemoryUtils.unsafeCopy(
-                        hashedMessage_.getDataPointer(),
-                        tmp_.getDataPointer() + 64 - hashedMessageLength_,
-                        hashedMessageLength_
-                    );
-
-                    hashedMessage_ = tmp_;
-                }
-            }
-
             uint512 scalar1_ = U512.moddiv(
                 call,
                 U512.fromBytes(hashedMessage_),
@@ -185,6 +139,18 @@ library ECDSA384 {
         }
     }
 
+    function _u512FromBytes2(bytes memory bytes_) private view returns (uint512, uint512) {
+        unchecked {
+            bytes memory lhs_ = new bytes(48);
+            bytes memory rhs_ = new bytes(48);
+
+            MemoryUtils.unsafeCopy(bytes_.getDataPointer(), lhs_.getDataPointer(), 48);
+            MemoryUtils.unsafeCopy(bytes_.getDataPointer() + 32, rhs_.getDataPointer(), 48);
+
+            return (U512.fromBytes(lhs_), U512.fromBytes(rhs_));
+        }
+    }
+
     /**
      * @dev Check if a point in affine coordinates is on the curve.
      */
diff --git a/contracts/libs/crypto/bn/U512.sol b/contracts/libs/crypto/bn/U512.sol
index 1e7ca702..1fe2a370 100644
--- a/contracts/libs/crypto/bn/U512.sol
+++ b/contracts/libs/crypto/bn/U512.sol
@@ -29,15 +29,27 @@ library U512 {
         }
     }
 
-    function fromBytes(bytes memory bytes_) internal pure returns (uint512 u512_) {
+    function fromBytes(bytes memory bytes_) internal view returns (uint512 u512_) {
         unchecked {
-            require(bytes_.length == 64, "U384: not 64 bytes");
+            require(bytes_.length <= 64, "U384: >64 bytes");
 
             u512_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
 
             assembly {
-                mstore(u512_, mload(add(bytes_, 0x20)))
-                mstore(add(u512_, 0x20), mload(add(bytes_, 0x40)))
+                mstore(u512_, 0)
+                mstore(add(u512_, 0x20), 0)
+
+                let size_ := mload(bytes_)
+                pop(
+                    staticcall(
+                        gas(),
+                        0x4,
+                        add(bytes_, 0x20),
+                        size_,
+                        add(u512_, sub(0x40, size_)),
+                        size_
+                    )
+                )
             }
         }
     }
@@ -609,28 +621,35 @@ library U512 {
     function _modmulOverflow(call call_, uint512 a_, uint512 b_) private pure {
         unchecked {
             assembly {
-                let a0_ := shr(128, mload(a_))
-                let a1_ := and(mload(a_), 0xffffffffffffffffffffffffffffffff)
-                let a2_ := shr(128, mload(add(a_, 0x20)))
-                let a3_ := and(mload(add(a_, 0x20)), 0xffffffffffffffffffffffffffffffff)
-
-                let b0_ := shr(128, mload(b_))
-                let b1_ := and(mload(b_), 0xffffffffffffffffffffffffffffffff)
-                let b2_ := shr(128, mload(add(b_, 0x20)))
-                let b3_ := and(mload(add(b_, 0x20)), 0xffffffffffffffffffffffffffffffff)
+                function __mul(_a, _b, _idx1, _idx2) -> _word {
+                    _word := mul(
+                        shr(128, mload(add(_a, mul(_idx1, 0x10)))),
+                        shr(128, mload(add(_b, mul(_idx2, 0x10))))
+                    )
+                }
+
+                //                let a0_ := shr(128, mload(a_))
+                //                let a1_ := and(mload(a_), 0xffffffffffffffffffffffffffffffff)
+                //                let a2_ := shr(128, mload(add(a_, 0x20)))
+                //                let a3_ := and(mload(add(a_, 0x20)), 0xffffffffffffffffffffffffffffffff)
+                //
+                //                let b0_ := shr(128, mload(b_))
+                //                let b1_ := and(mload(b_), 0xffffffffffffffffffffffffffffffff)
+                //                let b2_ := shr(128, mload(add(b_, 0x20)))
+                //                let b3_ := and(mload(add(b_, 0x20)), 0xffffffffffffffffffffffffffffffff)
 
                 // r7
-                let current_ := mul(a3_, b3_)
+                let current_ := __mul(a_, b_, 3, 3)
                 let ri_ := and(current_, 0xffffffffffffffffffffffffffffffff)
 
                 // r6
                 current_ := shr(128, current_)
 
-                let temp_ := mul(a3_, b2_)
+                let temp_ := __mul(a_, b_, 3, 2)
                 current_ := add(current_, temp_)
                 let curry_ := lt(current_, temp_)
 
-                temp_ := mul(a2_, b3_)
+                temp_ := __mul(a_, b_, 2, 3)
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
@@ -640,15 +659,15 @@ library U512 {
                 current_ := add(shl(128, curry_), shr(128, current_))
                 curry_ := 0
 
-                temp_ := mul(a3_, b1_)
+                temp_ := __mul(a_, b_, 3, 1)
                 current_ := add(current_, temp_)
                 curry_ := lt(current_, temp_)
 
-                temp_ := mul(a2_, b2_)
+                temp_ := __mul(a_, b_, 2, 2)
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
-                temp_ := mul(a1_, b3_)
+                temp_ := __mul(a_, b_, 1, 3)
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
@@ -658,19 +677,19 @@ library U512 {
                 current_ := add(shl(128, curry_), shr(128, current_))
                 curry_ := 0
 
-                temp_ := mul(a3_, b0_)
+                temp_ := __mul(a_, b_, 3, 0)
                 current_ := add(current_, temp_)
                 curry_ := lt(current_, temp_)
 
-                temp_ := mul(a2_, b1_)
+                temp_ := __mul(a_, b_, 2, 1)
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
-                temp_ := mul(a1_, b2_)
+                temp_ := __mul(a_, b_, 1, 2)
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
-                temp_ := mul(a0_, b2_)
+                temp_ := __mul(a_, b_, 0, 3)
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
@@ -680,15 +699,15 @@ library U512 {
                 current_ := add(shl(128, curry_), shr(128, current_))
                 curry_ := 0
 
-                temp_ := mul(a2_, b0_)
+                temp_ := __mul(a_, b_, 2, 0)
                 current_ := add(current_, temp_)
                 curry_ := lt(current_, temp_)
 
-                temp_ := mul(a1_, b1_)
+                temp_ := __mul(a_, b_, 1, 1)
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
-                temp_ := mul(a0_, b2_)
+                temp_ := __mul(a_, b_, 0, 2)
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
@@ -698,11 +717,11 @@ library U512 {
                 current_ := add(shl(128, curry_), shr(128, current_))
                 curry_ := 0
 
-                temp_ := mul(a1_, b0_)
+                temp_ := __mul(a_, b_, 1, 0)
                 current_ := add(current_, temp_)
                 curry_ := lt(current_, temp_)
 
-                temp_ := mul(a0_, b1_)
+                temp_ := __mul(a_, b_, 0, 1)
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
@@ -710,7 +729,7 @@ library U512 {
 
                 // r1
                 current_ := add(shl(128, curry_), shr(128, current_))
-                current_ := add(current_, mul(a0_, b0_))
+                current_ := add(current_, __mul(a_, b_, 0, 0))
 
                 mstore(add(call_, 0x60), current_)
             }

From 372081459104a1bb030bd66e125571f9223c51e2 Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Thu, 16 Jan 2025 22:26:53 +0200
Subject: [PATCH 05/42] fixed add & added test

---
 contracts/libs/crypto/bn/U512.sol          |  6 +-
 contracts/mock/libs/crypto/bn/U512Mock.sol | 79 ++++++++++++++++++++++
 test/libs/crypto/ECDSA384.test.ts          |  2 +-
 test/libs/crypto/bn/U512.test.ts           | 37 ++++++++++
 4 files changed, 120 insertions(+), 4 deletions(-)
 create mode 100644 contracts/mock/libs/crypto/bn/U512Mock.sol
 create mode 100644 test/libs/crypto/bn/U512.test.ts

diff --git a/contracts/libs/crypto/bn/U512.sol b/contracts/libs/crypto/bn/U512.sol
index 1fe2a370..4418551b 100644
--- a/contracts/libs/crypto/bn/U512.sol
+++ b/contracts/libs/crypto/bn/U512.sol
@@ -467,7 +467,7 @@ library U512 {
                 mstore(r_, sum_)
 
                 sum_ := gt(aWord_, sum_)
-                sum_ := add(sum_, add(aWord_, mload(b_)))
+                sum_ := add(sum_, add(mload(a_), mload(b_)))
 
                 mstore(add(r_, 0x20), sum_)
             }
@@ -483,12 +483,12 @@ library U512 {
                 mstore(add(call_, 0xA0), sum_)
 
                 sum_ := gt(aWord_, sum_)
-                sum_ := add(sum_, add(aWord_, mload(b_)))
+                sum_ := add(sum_, add(mload(a_), mload(b_)))
 
                 mstore(add(call_, 0x80), sum_)
                 mstore(add(call_, 0x60), gt(mload(a_), sum_))
 
-                mstore(call_, 0x40)
+                mstore(call_, 0x60)
                 mstore(add(call_, 0x20), 0x20)
                 mstore(add(call_, 0x40), 0x40)
                 mstore(add(call_, 0xC0), 0x01)
diff --git a/contracts/mock/libs/crypto/bn/U512Mock.sol b/contracts/mock/libs/crypto/bn/U512Mock.sol
new file mode 100644
index 00000000..b2951ce3
--- /dev/null
+++ b/contracts/mock/libs/crypto/bn/U512Mock.sol
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: MIT
+pragma solidity ^0.8.4;
+
+import {call} from "../../../../libs/crypto/bn/U512.sol";
+import {uint512} from "../../../../libs/crypto/bn/U512.sol";
+import {U512} from "../../../../libs/crypto/bn/U512.sol";
+
+contract U512Mock {
+    using U512 for *;
+
+    function modadd(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        return modaddGas(call_, a_, b_, m_).toBytes();
+    }
+
+    function modsub(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        return modsubGas(call_, a_, b_, m_).toBytes();
+    }
+
+    function modmul(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        return modmulGas(call_, a_, b_, m_).toBytes();
+    }
+
+    function modaddGas(
+        call call_,
+        uint512 a_,
+        uint512 b_,
+        uint512 m_
+    ) public view returns (uint512) {
+        return U512.modadd(call_, a_, b_, m_);
+    }
+
+    function modsubGas(
+        call call_,
+        uint512 a_,
+        uint512 b_,
+        uint512 m_
+    ) public view returns (uint512) {
+        return U512.modsub(call_, a_, b_, m_);
+    }
+
+    function modmulGas(
+        call call_,
+        uint512 a_,
+        uint512 b_,
+        uint512 m_
+    ) public view returns (uint512) {
+        return U512.modmul(call_, a_, b_, m_);
+    }
+}
diff --git a/test/libs/crypto/ECDSA384.test.ts b/test/libs/crypto/ECDSA384.test.ts
index 9294b951..de29aaac 100644
--- a/test/libs/crypto/ECDSA384.test.ts
+++ b/test/libs/crypto/ECDSA384.test.ts
@@ -168,7 +168,7 @@ describe("ECDSA384", () => {
     });
   });
 
-  describe.only("brainpoolP384r1", () => {
+  describe("brainpoolP384r1", () => {
     const signature =
       "0x42d803dcea3f9809cda4ce5a541d969dbeacd6ab7bef7788db1e4a00dac3ae87c1c241c24bb39e041725e607718fc322306b08967b56e4e49d7c9afc48833f580ac9b49cdcec0962d564f89a8f0b57a9742573ebcbe709869253e8b466cb33be";
     const pubKey =
diff --git a/test/libs/crypto/bn/U512.test.ts b/test/libs/crypto/bn/U512.test.ts
new file mode 100644
index 00000000..994a4128
--- /dev/null
+++ b/test/libs/crypto/bn/U512.test.ts
@@ -0,0 +1,37 @@
+import { ethers } from "hardhat";
+import { expect } from "chai";
+import { Reverter } from "@/test/helpers/reverter";
+
+import { U512Mock } from "@ethers-v6";
+
+describe("U512", () => {
+  const reverter = new Reverter();
+
+  let u512: U512Mock;
+
+  function randomU512(): string {
+    return "0x" + ethers.toBigInt(ethers.randomBytes(64)).toString(16);
+  }
+
+  function modadd(a: string, b: string, m: string): string {
+    return "0x" + ((ethers.toBigInt(a) + ethers.toBigInt(b)) % ethers.toBigInt(m)).toString(16).padStart(128, "0");
+  }
+
+  before(async () => {
+    const U512Mock = await ethers.getContractFactory("U512Mock");
+
+    u512 = await U512Mock.deploy();
+
+    await reverter.snapshot();
+  });
+
+  afterEach(reverter.revert);
+
+  it.only("modadd test", async () => {
+    const a = randomU512();
+    const b = randomU512();
+    const m = randomU512();
+
+    expect(await u512.modadd(a, b, m)).to.equal(modadd(a, b, m));
+  });
+});

From a8a79a5686026593400250e0c2ffc6cc9aaffa66 Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Thu, 16 Jan 2025 23:25:34 +0200
Subject: [PATCH 06/42] wip (passed ecdsa check) 17.8kk

---
 contracts/libs/crypto/ECDSA384.sol         | 31 +++++----
 contracts/libs/crypto/bn/U512.sol          | 58 +++++++---------
 contracts/mock/libs/crypto/bn/U512Mock.sol | 51 +++++++-------
 test/libs/crypto/ECDSA384.test.ts          |  2 +-
 test/libs/crypto/bn/U512.test.ts           | 78 +++++++++++++++++++---
 5 files changed, 138 insertions(+), 82 deletions(-)

diff --git a/contracts/libs/crypto/ECDSA384.sol b/contracts/libs/crypto/ECDSA384.sol
index 20b3bf82..5156504b 100644
--- a/contracts/libs/crypto/ECDSA384.sol
+++ b/contracts/libs/crypto/ECDSA384.sol
@@ -5,8 +5,6 @@ import {call, uint512} from "./bn/U512.sol";
 import {U512} from "./bn/U512.sol";
 import {MemoryUtils} from "../utils/MemoryUtils.sol";
 
-import "hardhat/console.sol";
-
 /**
  * @notice Cryptography module
  *
@@ -85,6 +83,8 @@ library ECDSA384 {
 
             call call = U512.initCall();
 
+            console.log("passed0");
+
             /// accept s only from the lower part of the curve
             if (
                 U512.eqUint256(inputs_.r, 0) ||
@@ -139,18 +139,6 @@ library ECDSA384 {
         }
     }
 
-    function _u512FromBytes2(bytes memory bytes_) private view returns (uint512, uint512) {
-        unchecked {
-            bytes memory lhs_ = new bytes(48);
-            bytes memory rhs_ = new bytes(48);
-
-            MemoryUtils.unsafeCopy(bytes_.getDataPointer(), lhs_.getDataPointer(), 48);
-            MemoryUtils.unsafeCopy(bytes_.getDataPointer() + 32, rhs_.getDataPointer(), 48);
-
-            return (U512.fromBytes(lhs_), U512.fromBytes(rhs_));
-        }
-    }
-
     /**
      * @dev Check if a point in affine coordinates is on the curve.
      */
@@ -390,4 +378,19 @@ library ECDSA384 {
             return points_;
         }
     }
+
+    /**
+     * @dev Convert 96 bytes to two 512-bit unsigned integers.
+     */
+    function _u512FromBytes2(bytes memory bytes_) private view returns (uint512, uint512) {
+        unchecked {
+            bytes memory lhs_ = new bytes(48);
+            bytes memory rhs_ = new bytes(48);
+
+            MemoryUtils.unsafeCopy(bytes_.getDataPointer(), lhs_.getDataPointer(), 48);
+            MemoryUtils.unsafeCopy(bytes_.getDataPointer() + 48, rhs_.getDataPointer(), 48);
+
+            return (U512.fromBytes(lhs_), U512.fromBytes(rhs_));
+        }
+    }
 }
diff --git a/contracts/libs/crypto/bn/U512.sol b/contracts/libs/crypto/bn/U512.sol
index 4418551b..52364b19 100644
--- a/contracts/libs/crypto/bn/U512.sol
+++ b/contracts/libs/crypto/bn/U512.sol
@@ -424,7 +424,7 @@ library U512 {
                 mstore(add(call_, 0xA0), mload(e_))
                 mstore(add(call_, 0xC0), mload(add(e_, 0x20)))
                 mstore(add(call_, 0xE0), mload(m_))
-                mstore(add(call_, 0x01000), mload(add(m_, 0x20)))
+                mstore(add(call_, 0x0100), mload(add(m_, 0x20)))
 
                 pop(staticcall(gas(), 0x5, call_, 0x0120, r_, 0x40))
             }
@@ -621,45 +621,35 @@ library U512 {
     function _modmulOverflow(call call_, uint512 a_, uint512 b_) private pure {
         unchecked {
             assembly {
-                function __mul(_a, _b, _idx1, _idx2) -> _word {
-                    _word := mul(
-                        shr(128, mload(add(_a, mul(_idx1, 0x10)))),
-                        shr(128, mload(add(_b, mul(_idx2, 0x10))))
+                function __mul(a, b, idx1, idx2) -> word {
+                    word := mul(
+                        shr(128, mload(add(a, mul(idx1, 0x10)))),
+                        shr(128, mload(add(b, mul(idx2, 0x10))))
                     )
                 }
 
-                //                let a0_ := shr(128, mload(a_))
-                //                let a1_ := and(mload(a_), 0xffffffffffffffffffffffffffffffff)
-                //                let a2_ := shr(128, mload(add(a_, 0x20)))
-                //                let a3_ := and(mload(add(a_, 0x20)), 0xffffffffffffffffffffffffffffffff)
-                //
-                //                let b0_ := shr(128, mload(b_))
-                //                let b1_ := and(mload(b_), 0xffffffffffffffffffffffffffffffff)
-                //                let b2_ := shr(128, mload(add(b_, 0x20)))
-                //                let b3_ := and(mload(add(b_, 0x20)), 0xffffffffffffffffffffffffffffffff)
-
                 // r7
                 let current_ := __mul(a_, b_, 3, 3)
-                let ri_ := and(current_, 0xffffffffffffffffffffffffffffffff)
+                let r0_ := and(current_, 0xffffffffffffffffffffffffffffffff)
 
                 // r6
                 current_ := shr(128, current_)
 
-                let temp_ := __mul(a_, b_, 3, 2)
+                let temp_ := __mul(a_, b_, 2, 3)
                 current_ := add(current_, temp_)
                 let curry_ := lt(current_, temp_)
 
-                temp_ := __mul(a_, b_, 2, 3)
+                temp_ := __mul(a_, b_, 3, 2)
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
-                mstore(add(call_, 0xC0), add(shl(128, current_), ri_))
+                mstore(add(call_, 0xC0), add(shl(128, current_), r0_))
 
                 // r5
                 current_ := add(shl(128, curry_), shr(128, current_))
                 curry_ := 0
 
-                temp_ := __mul(a_, b_, 3, 1)
+                temp_ := __mul(a_, b_, 1, 3)
                 current_ := add(current_, temp_)
                 curry_ := lt(current_, temp_)
 
@@ -667,33 +657,33 @@ library U512 {
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
-                temp_ := __mul(a_, b_, 1, 3)
+                temp_ := __mul(a_, b_, 3, 1)
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
-                ri_ := and(current_, 0xffffffffffffffffffffffffffffffff)
+                r0_ := and(current_, 0xffffffffffffffffffffffffffffffff)
 
                 // r4
                 current_ := add(shl(128, curry_), shr(128, current_))
                 curry_ := 0
 
-                temp_ := __mul(a_, b_, 3, 0)
+                temp_ := __mul(a_, b_, 0, 3)
                 current_ := add(current_, temp_)
                 curry_ := lt(current_, temp_)
 
-                temp_ := __mul(a_, b_, 2, 1)
+                temp_ := __mul(a_, b_, 1, 2)
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
-                temp_ := __mul(a_, b_, 1, 2)
+                temp_ := __mul(a_, b_, 2, 1)
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
-                temp_ := __mul(a_, b_, 0, 3)
+                temp_ := __mul(a_, b_, 3, 0)
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
-                mstore(add(call_, 0xA0), add(shl(128, current_), ri_))
+                mstore(add(call_, 0xA0), add(shl(128, current_), r0_))
 
                 // r3
                 current_ := add(shl(128, curry_), shr(128, current_))
@@ -711,21 +701,21 @@ library U512 {
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
-                ri_ := and(current_, 0xffffffffffffffffffffffffffffffff)
+                r0_ := and(current_, 0xffffffffffffffffffffffffffffffff)
 
                 // r2
                 current_ := add(shl(128, curry_), shr(128, current_))
                 curry_ := 0
 
-                temp_ := __mul(a_, b_, 1, 0)
+                temp_ := __mul(a_, b_, 0, 1)
                 current_ := add(current_, temp_)
                 curry_ := lt(current_, temp_)
 
-                temp_ := __mul(a_, b_, 0, 1)
+                temp_ := __mul(a_, b_, 1, 0)
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
-                mstore(add(call_, 0x80), add(shl(128, current_), ri_))
+                mstore(add(call_, 0x80), add(shl(128, current_), r0_))
 
                 // r1
                 current_ := add(shl(128, curry_), shr(128, current_))
@@ -741,7 +731,7 @@ library U512 {
             _modmulOverflow(call_, a_, b_);
 
             assembly {
-                mstore(call_, 0x40)
+                mstore(call_, 0x80)
                 mstore(add(call_, 0x20), 0x20)
                 mstore(add(call_, 0x40), 0x40)
                 mstore(add(call_, 0xE0), 0x01)
@@ -758,10 +748,10 @@ library U512 {
             uint512 buffer_ = _buffer(call_);
 
             _modinv(call_, b_, m_, buffer_);
-            _modmulOverflow(call_, a_, b_);
+            _modmulOverflow(call_, a_, buffer_);
 
             assembly {
-                mstore(call_, 0x60)
+                mstore(call_, 0x80)
                 mstore(add(0x20, call_), 0x20)
                 mstore(add(0x40, call_), 0x40)
                 mstore(add(0xE0, call_), 0x01)
diff --git a/contracts/mock/libs/crypto/bn/U512Mock.sol b/contracts/mock/libs/crypto/bn/U512Mock.sol
index b2951ce3..b94f6564 100644
--- a/contracts/mock/libs/crypto/bn/U512Mock.sol
+++ b/contracts/mock/libs/crypto/bn/U512Mock.sol
@@ -19,7 +19,7 @@ contract U512Mock {
         uint512 b_ = U512.fromBytes(bBytes_);
         uint512 m_ = U512.fromBytes(mBytes_);
 
-        return modaddGas(call_, a_, b_, m_).toBytes();
+        return U512.modadd(call_, a_, b_, m_).toBytes();
     }
 
     function modsub(
@@ -33,7 +33,7 @@ contract U512Mock {
         uint512 b_ = U512.fromBytes(bBytes_);
         uint512 m_ = U512.fromBytes(mBytes_);
 
-        return modsubGas(call_, a_, b_, m_).toBytes();
+        return U512.modsub(call_, a_, b_, m_).toBytes();
     }
 
     function modmul(
@@ -47,33 +47,34 @@ contract U512Mock {
         uint512 b_ = U512.fromBytes(bBytes_);
         uint512 m_ = U512.fromBytes(mBytes_);
 
-        return modmulGas(call_, a_, b_, m_).toBytes();
+        return U512.modmul(call_, a_, b_, m_).toBytes();
     }
 
-    function modaddGas(
-        call call_,
-        uint512 a_,
-        uint512 b_,
-        uint512 m_
-    ) public view returns (uint512) {
-        return U512.modadd(call_, a_, b_, m_);
-    }
+    function modexp(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
 
-    function modsubGas(
-        call call_,
-        uint512 a_,
-        uint512 b_,
-        uint512 m_
-    ) public view returns (uint512) {
-        return U512.modsub(call_, a_, b_, m_);
+        return U512.modexp(call_, a_, b_, m_).toBytes();
     }
 
-    function modmulGas(
-        call call_,
-        uint512 a_,
-        uint512 b_,
-        uint512 m_
-    ) public view returns (uint512) {
-        return U512.modmul(call_, a_, b_, m_);
+    function moddiv(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        return U512.moddiv(call_, a_, b_, m_).toBytes();
     }
 }
diff --git a/test/libs/crypto/ECDSA384.test.ts b/test/libs/crypto/ECDSA384.test.ts
index de29aaac..9294b951 100644
--- a/test/libs/crypto/ECDSA384.test.ts
+++ b/test/libs/crypto/ECDSA384.test.ts
@@ -168,7 +168,7 @@ describe("ECDSA384", () => {
     });
   });
 
-  describe("brainpoolP384r1", () => {
+  describe.only("brainpoolP384r1", () => {
     const signature =
       "0x42d803dcea3f9809cda4ce5a541d969dbeacd6ab7bef7788db1e4a00dac3ae87c1c241c24bb39e041725e607718fc322306b08967b56e4e49d7c9afc48833f580ac9b49cdcec0962d564f89a8f0b57a9742573ebcbe709869253e8b466cb33be";
     const pubKey =
diff --git a/test/libs/crypto/bn/U512.test.ts b/test/libs/crypto/bn/U512.test.ts
index 994a4128..fb4e1d1e 100644
--- a/test/libs/crypto/bn/U512.test.ts
+++ b/test/libs/crypto/bn/U512.test.ts
@@ -4,17 +4,37 @@ import { Reverter } from "@/test/helpers/reverter";
 
 import { U512Mock } from "@ethers-v6";
 
-describe("U512", () => {
+describe.only("U512", () => {
   const reverter = new Reverter();
 
   let u512: U512Mock;
 
   function randomU512(): string {
-    return "0x" + ethers.toBigInt(ethers.randomBytes(64)).toString(16);
+    return "0x" + ethers.toBigInt(ethers.randomBytes(64)).toString(16).padStart(128, "0");
+  }
+
+  function toBytes(value: bigint): string {
+    return "0x" + value.toString(16).padStart(128, "0");
   }
 
   function modadd(a: string, b: string, m: string): string {
-    return "0x" + ((ethers.toBigInt(a) + ethers.toBigInt(b)) % ethers.toBigInt(m)).toString(16).padStart(128, "0");
+    return toBytes((ethers.toBigInt(a) + ethers.toBigInt(b)) % ethers.toBigInt(m));
+  }
+
+  function modmul(a: string, b: string, m: string): string {
+    return toBytes((ethers.toBigInt(a) * ethers.toBigInt(b)) % ethers.toBigInt(m));
+  }
+
+  function modexp(a: string, b: string, m: string): string {
+    return toBytes(ethers.toBigInt(a) ** ethers.toBigInt(b) % ethers.toBigInt(m));
+  }
+
+  function modsub(a: string, b: string, m: string): string {
+    const aBn = ethers.toBigInt(a);
+    const bBn = ethers.toBigInt(b);
+    const mBn = ethers.toBigInt(m);
+
+    return toBytes((((aBn - bBn) % mBn) + mBn) % mBn);
   }
 
   before(async () => {
@@ -27,11 +47,53 @@ describe("U512", () => {
 
   afterEach(reverter.revert);
 
-  it.only("modadd test", async () => {
-    const a = randomU512();
-    const b = randomU512();
-    const m = randomU512();
+  it("modadd test", async () => {
+    for (let i = 0; i < 100; ++i) {
+      const a = randomU512();
+      const b = randomU512();
+      const m = randomU512();
+
+      expect(await u512.modadd(a, b, m)).to.equal(modadd(a, b, m));
+    }
+  });
+
+  it("modmul test", async () => {
+    for (let i = 0; i < 100; ++i) {
+      const a = randomU512();
+      const b = randomU512();
+      const m = randomU512();
+
+      expect(await u512.modmul(a, b, m)).to.equal(modmul(a, b, m));
+    }
+  });
+
+  it("modsub test", async () => {
+    for (let i = 0; i < 100; ++i) {
+      const a = randomU512();
+      const b = randomU512();
+      const m = randomU512();
+
+      expect(await u512.modsub(a, b, m)).to.equal(modsub(a, b, m));
+    }
+  });
+
+  it("modexp test", async () => {
+    for (let i = 0; i < 100; ++i) {
+      const a = randomU512();
+      const b = toBytes(100n);
+      const m = randomU512();
+
+      expect(await u512.modexp(a, b, m)).to.equal(modexp(a, b, m));
+    }
+  });
+
+  it("moddiv test", async () => {
+    const a = toBytes(779149564533142355434093157610126726613246737199n);
+    const b = toBytes(29118654464229156312755475164902924590603964377702716942232927993582928167089n);
+    const m = toBytes(76884956397045344220809746629001649092737531784414529538755519063063536359079n);
+
+    const expected = toBytes(30823410400962253491978005949535646087432096635784775122170630924100507445065n);
 
-    expect(await u512.modadd(a, b, m)).to.equal(modadd(a, b, m));
+    expect(await u512.moddiv(a, b, m)).to.equal(expected);
   });
 });

From c698a36ed2ca5dd4921ddb087cc33fdb8f0c1fed Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Thu, 16 Jan 2025 23:25:55 +0200
Subject: [PATCH 07/42] fix

---
 contracts/libs/crypto/ECDSA384.sol | 2 --
 1 file changed, 2 deletions(-)

diff --git a/contracts/libs/crypto/ECDSA384.sol b/contracts/libs/crypto/ECDSA384.sol
index 5156504b..f3ce9772 100644
--- a/contracts/libs/crypto/ECDSA384.sol
+++ b/contracts/libs/crypto/ECDSA384.sol
@@ -83,8 +83,6 @@ library ECDSA384 {
 
             call call = U512.initCall();
 
-            console.log("passed0");
-
             /// accept s only from the lower part of the curve
             if (
                 U512.eqUint256(inputs_.r, 0) ||

From 85a2c36193346a51b7228887f0728add1b561ba3 Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Fri, 17 Jan 2025 08:40:45 +0200
Subject: [PATCH 08/42] opt

---
 contracts/libs/crypto/ECDSA384.sol | 63 +++++++++++++++---------------
 contracts/libs/crypto/bn/U512.sol  | 21 ++++++++++
 2 files changed, 53 insertions(+), 31 deletions(-)

diff --git a/contracts/libs/crypto/ECDSA384.sol b/contracts/libs/crypto/ECDSA384.sol
index f3ce9772..63e1f1c7 100644
--- a/contracts/libs/crypto/ECDSA384.sol
+++ b/contracts/libs/crypto/ECDSA384.sol
@@ -81,7 +81,7 @@ library ECDSA384 {
                 lowSmax: U512.fromBytes(curveParams_.lowSmax)
             });
 
-            call call = U512.initCall();
+            call call_ = U512.initCall();
 
             /// accept s only from the lower part of the curve
             if (
@@ -93,26 +93,25 @@ library ECDSA384 {
                 return false;
             }
 
-            if (!_isOnCurve(call, params_.p, params_.a, params_.b, inputs_.x, inputs_.y)) {
+            if (!_isOnCurve(call_, params_.p, params_.a, params_.b, inputs_.x, inputs_.y)) {
                 return false;
             }
 
             uint512 scalar1_ = U512.moddiv(
-                call,
+                call_,
                 U512.fromBytes(hashedMessage_),
                 inputs_.s,
                 params_.n
             );
-            uint512 scalar2_ = U512.moddiv(call, inputs_.r, inputs_.s, params_.n);
+            uint512 scalar2_ = U512.moddiv(call_, inputs_.r, inputs_.s, params_.n);
 
             {
-                uint512 three_ = U512.fromUint256(3);
-
                 /// We use 6-bit masks where the first 3 bits refer to `scalar1` and the last 3 bits refer to `scalar2`.
                 uint512[2][64] memory points_ = _precomputePointsTable(
-                    call,
+                    call_,
                     params_.p,
-                    three_,
+                    U512.fromUint256(2),
+                    U512.fromUint256(3),
                     params_.a,
                     params_.gx,
                     params_.gy,
@@ -121,9 +120,10 @@ library ECDSA384 {
                 );
 
                 (scalar1_, ) = _doubleScalarMultiplication(
-                    call,
+                    call_,
                     params_.p,
-                    three_,
+                    U512.fromUint256(2),
+                    U512.fromUint256(3),
                     params_.a,
                     points_,
                     scalar1_,
@@ -131,7 +131,7 @@ library ECDSA384 {
                 );
             }
 
-            U512.modAssign(call, scalar1_, params_.n);
+            U512.modAssign(call_, scalar1_, params_.n);
 
             return U512.eq(scalar1_, inputs_.r);
         }
@@ -179,6 +179,7 @@ library ECDSA384 {
     function _doubleScalarMultiplication(
         call call_,
         uint512 p_,
+        uint512 two_,
         uint512 three_,
         uint512 a_,
         uint512[2][64] memory points_,
@@ -197,12 +198,13 @@ library ECDSA384 {
                 mask_ = (mask1_ << 3) | mask2_;
 
                 if (mask_ != 0) {
-                    (x_, y_) = _twiceAffine(call_, p_, three_, a_, x_, y_);
-                    (x_, y_) = _twiceAffine(call_, p_, three_, a_, x_, y_);
-                    (x_, y_) = _twiceAffine(call_, p_, three_, a_, x_, y_);
+                    (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
+                    (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
+                    (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
                     (x_, y_) = _addAffine(
                         call_,
                         p_,
+                        two_,
                         three_,
                         a_,
                         points_[mask_][0],
@@ -242,6 +244,7 @@ library ECDSA384 {
     function _twiceAffine(
         call call_,
         uint512 p_,
+        uint512 two_,
         uint512 three_,
         uint512 a_,
         uint512 x1_,
@@ -256,14 +259,14 @@ library ECDSA384 {
                 return (x2_, y2_);
             }
 
-            uint512 m1_ = U512.modexp(call_, x1_, U512.fromUint256(2), p_);
+            uint512 m1_ = U512.modexp(call_, x1_, two_, p_);
             U512.modmulAssign(call_, m1_, three_, p_);
             U512.modaddAssign(call_, m1_, a_, p_);
 
-            uint512 m2_ = U512.modmul(call_, y1_, U512.fromUint256(2), p_);
+            uint512 m2_ = U512.shl(call_, y1_, p_);
             U512.moddivAssign(call_, m1_, m2_, p_);
 
-            x2_ = U512.modexp(call_, m1_, U512.fromUint256(2), p_);
+            x2_ = U512.modexp(call_, m1_, two_, p_);
             U512.modsubAssign(call_, x2_, x1_, p_);
             U512.modsubAssign(call_, x2_, x1_, p_);
 
@@ -279,6 +282,7 @@ library ECDSA384 {
     function _addAffine(
         call call_,
         uint512 p_,
+        uint512 two_,
         uint512 three_,
         uint512 a_,
         uint512 x1_,
@@ -300,7 +304,7 @@ library ECDSA384 {
 
             if (U512.eq(x1_, x2_)) {
                 if (U512.eq(y1_, y2_)) {
-                    return _twiceAffine(call_, p_, three_, a_, x1_, y1_);
+                    return _twiceAffine(call_, p_, two_, three_, a_, x1_, y1_);
                 }
 
                 return (x3, y3);
@@ -311,7 +315,7 @@ library ECDSA384 {
 
             U512.moddivAssign(call_, m1_, m2_, p_);
 
-            x3 = U512.modexp(call_, m1_, U512.fromUint256(2), p_);
+            x3 = U512.modexp(call_, m1_, two_, p_);
             U512.modsubAssign(call_, x3, x1_, p_);
             U512.modsubAssign(call_, x3, x2_, p_);
 
@@ -324,6 +328,7 @@ library ECDSA384 {
     function _precomputePointsTable(
         call call_,
         uint512 p_,
+        uint512 two_,
         uint512 three_,
         uint512 a_,
         uint512 gx_,
@@ -341,31 +346,27 @@ library ECDSA384 {
                         continue;
                     }
 
-                    uint256 maskTo = (i << 3) | j;
-
                     if (i != 0) {
-                        uint256 maskFrom_ = ((i - 1) << 3) | j;
-
-                        (points_[maskTo][0], points_[maskTo][1]) = _addAffine(
+                        (points_[(i << 3) | j][0], points_[(i << 3) | j][1]) = _addAffine(
                             call_,
                             p_,
+                            two_,
                             three_,
                             a_,
-                            points_[maskFrom_][0],
-                            points_[maskFrom_][1],
+                            points_[((i - 1) << 3) | j][0],
+                            points_[((i - 1) << 3) | j][1],
                             gx_,
                             gy_
                         );
                     } else {
-                        uint256 maskFrom_ = (i << 3) | (j - 1);
-
-                        (points_[maskTo][0], points_[maskTo][1]) = _addAffine(
+                        (points_[(i << 3) | j][0], points_[(i << 3) | j][1]) = _addAffine(
                             call_,
                             p_,
+                            two_,
                             three_,
                             a_,
-                            points_[maskFrom_][0],
-                            points_[maskFrom_][1],
+                            points_[(i << 3) | (j - 1)][0],
+                            points_[(i << 3) | (j - 1)][1],
                             hx_,
                             hy_
                         );
diff --git a/contracts/libs/crypto/bn/U512.sol b/contracts/libs/crypto/bn/U512.sol
index 52364b19..42fcef9b 100644
--- a/contracts/libs/crypto/bn/U512.sol
+++ b/contracts/libs/crypto/bn/U512.sol
@@ -396,6 +396,27 @@ library U512 {
         }
     }
 
+    function shl(call call_, uint512 a_, uint512 p_) internal view returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
+
+            _shl(call_, a_, p_, r_);
+        }
+    }
+
+    function _shl(call call_, uint512 a_, uint512 p_, uint512 r_) private view {
+        unchecked {
+            assembly {
+                let a1_ := mload(add(a_, 0x20))
+
+                mstore(r_, or(shl(1, mload(a_)), shr(255, a1_)))
+                mstore(add(r_, 0x20), shl(1, a1_))
+            }
+
+            _mod(call_, r_, p_, r_);
+        }
+    }
+
     function _mod(call call_, uint512 a_, uint512 m_, uint512 r_) private view {
         unchecked {
             assembly {

From de6382b80e9bb096c09f67cb9d55ded2892699c1 Mon Sep 17 00:00:00 2001
From: Artem Chystiakov <artem.ch31@gmail.com>
Date: Fri, 17 Jan 2025 14:10:37 +0200
Subject: [PATCH 09/42] -500k

---
 contracts/libs/crypto/ECDSA384.sol | 24 +++++++------
 contracts/libs/crypto/bn/U512.sol  | 57 ++++++++++++++++--------------
 2 files changed, 45 insertions(+), 36 deletions(-)

diff --git a/contracts/libs/crypto/ECDSA384.sol b/contracts/libs/crypto/ECDSA384.sol
index 63e1f1c7..167db0e5 100644
--- a/contracts/libs/crypto/ECDSA384.sol
+++ b/contracts/libs/crypto/ECDSA384.sol
@@ -113,10 +113,10 @@ library ECDSA384 {
                     U512.fromUint256(2),
                     U512.fromUint256(3),
                     params_.a,
-                    params_.gx,
-                    params_.gy,
                     inputs_.x,
-                    inputs_.y
+                    inputs_.y,
+                    params_.gx,
+                    params_.gy
                 );
 
                 (scalar1_, ) = _doubleScalarMultiplication(
@@ -331,10 +331,10 @@ library ECDSA384 {
         uint512 two_,
         uint512 three_,
         uint512 a_,
-        uint512 gx_,
-        uint512 gy_,
         uint512 hx_,
-        uint512 hy_
+        uint512 hy_,
+        uint512 gx_,
+        uint512 gy_
     ) private view returns (uint512[2][64] memory points_) {
         unchecked {
             (points_[0x01][0], points_[0x01][1]) = (U512.copy(hx_), U512.copy(hy_));
@@ -346,20 +346,24 @@ library ECDSA384 {
                         continue;
                     }
 
+                    uint256 maskTo = (i << 3) | j;
+
                     if (i != 0) {
-                        (points_[(i << 3) | j][0], points_[(i << 3) | j][1]) = _addAffine(
+                        uint256 maskFrom = ((i - 1) << 3) | j;
+
+                        (points_[maskTo][0], points_[maskTo][1]) = _addAffine(
                             call_,
                             p_,
                             two_,
                             three_,
                             a_,
-                            points_[((i - 1) << 3) | j][0],
-                            points_[((i - 1) << 3) | j][1],
+                            points_[maskFrom][0],
+                            points_[maskFrom][1],
                             gx_,
                             gy_
                         );
                     } else {
-                        (points_[(i << 3) | j][0], points_[(i << 3) | j][1]) = _addAffine(
+                        (points_[maskTo][0], points_[maskTo][1]) = _addAffine(
                             call_,
                             p_,
                             two_,
diff --git a/contracts/libs/crypto/bn/U512.sol b/contracts/libs/crypto/bn/U512.sol
index 42fcef9b..5866510d 100644
--- a/contracts/libs/crypto/bn/U512.sol
+++ b/contracts/libs/crypto/bn/U512.sol
@@ -31,7 +31,7 @@ library U512 {
 
     function fromBytes(bytes memory bytes_) internal view returns (uint512 u512_) {
         unchecked {
-            require(bytes_.length <= 64, "U384: >64 bytes");
+            assert(bytes_.length < 65);
 
             u512_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
 
@@ -639,28 +639,33 @@ library U512 {
         }
     }
 
-    function _modmulOverflow(call call_, uint512 a_, uint512 b_) private pure {
+    function _modmulOverflow(uint512 a_, uint512 b_, call call_) private pure {
         unchecked {
             assembly {
-                function __mul(a, b, idx1, idx2) -> word {
-                    word := mul(
-                        shr(128, mload(add(a, mul(idx1, 0x10)))),
-                        shr(128, mload(add(b, mul(idx2, 0x10))))
-                    )
-                }
+                let a3_ := and(mload(add(a_, 0x20)), 0xffffffffffffffffffffffffffffffff)
+                let b3_ := and(mload(add(b_, 0x20)), 0xffffffffffffffffffffffffffffffff)
+
+                let a2_ := shr(128, mload(add(a_, 0x20)))
+                let b2_ := shr(128, mload(add(b_, 0x20)))
+
+                let a1_ := and(mload(a_), 0xffffffffffffffffffffffffffffffff)
+                let b1_ := and(mload(b_), 0xffffffffffffffffffffffffffffffff)
+
+                let a0_ := shr(128, mload(a_))
+                let b0_ := shr(128, mload(b_))
 
                 // r7
-                let current_ := __mul(a_, b_, 3, 3)
+                let current_ := mul(a3_, b3_)
                 let r0_ := and(current_, 0xffffffffffffffffffffffffffffffff)
 
                 // r6
                 current_ := shr(128, current_)
 
-                let temp_ := __mul(a_, b_, 2, 3)
+                let temp_ := mul(a2_, b3_)
                 current_ := add(current_, temp_)
                 let curry_ := lt(current_, temp_)
 
-                temp_ := __mul(a_, b_, 3, 2)
+                temp_ := mul(a3_, b2_)
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
@@ -670,15 +675,15 @@ library U512 {
                 current_ := add(shl(128, curry_), shr(128, current_))
                 curry_ := 0
 
-                temp_ := __mul(a_, b_, 1, 3)
+                temp_ := mul(a1_, b3_)
                 current_ := add(current_, temp_)
                 curry_ := lt(current_, temp_)
 
-                temp_ := __mul(a_, b_, 2, 2)
+                temp_ := mul(a2_, b2_)
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
-                temp_ := __mul(a_, b_, 3, 1)
+                temp_ := mul(a3_, b1_)
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
@@ -688,19 +693,19 @@ library U512 {
                 current_ := add(shl(128, curry_), shr(128, current_))
                 curry_ := 0
 
-                temp_ := __mul(a_, b_, 0, 3)
+                temp_ := mul(a0_, b3_)
                 current_ := add(current_, temp_)
                 curry_ := lt(current_, temp_)
 
-                temp_ := __mul(a_, b_, 1, 2)
+                temp_ := mul(a1_, b2_)
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
-                temp_ := __mul(a_, b_, 2, 1)
+                temp_ := mul(a2_, b1_)
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
-                temp_ := __mul(a_, b_, 3, 0)
+                temp_ := mul(a3_, b0_)
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
@@ -710,15 +715,15 @@ library U512 {
                 current_ := add(shl(128, curry_), shr(128, current_))
                 curry_ := 0
 
-                temp_ := __mul(a_, b_, 2, 0)
+                temp_ := mul(a2_, b0_)
                 current_ := add(current_, temp_)
                 curry_ := lt(current_, temp_)
 
-                temp_ := __mul(a_, b_, 1, 1)
+                temp_ := mul(a1_, b1_)
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
-                temp_ := __mul(a_, b_, 0, 2)
+                temp_ := mul(a0_, b2_)
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
@@ -728,11 +733,11 @@ library U512 {
                 current_ := add(shl(128, curry_), shr(128, current_))
                 curry_ := 0
 
-                temp_ := __mul(a_, b_, 0, 1)
+                temp_ := mul(a0_, b1_)
                 current_ := add(current_, temp_)
                 curry_ := lt(current_, temp_)
 
-                temp_ := __mul(a_, b_, 1, 0)
+                temp_ := mul(a1_, b0_)
                 current_ := add(current_, temp_)
                 curry_ := add(curry_, lt(current_, temp_))
 
@@ -740,7 +745,7 @@ library U512 {
 
                 // r1
                 current_ := add(shl(128, curry_), shr(128, current_))
-                current_ := add(current_, __mul(a_, b_, 0, 0))
+                current_ := add(current_, mul(a0_, b0_))
 
                 mstore(add(call_, 0x60), current_)
             }
@@ -749,7 +754,7 @@ library U512 {
 
     function _modmul(call call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) private view {
         unchecked {
-            _modmulOverflow(call_, a_, b_);
+            _modmulOverflow(a_, b_, call_);
 
             assembly {
                 mstore(call_, 0x80)
@@ -769,7 +774,7 @@ library U512 {
             uint512 buffer_ = _buffer(call_);
 
             _modinv(call_, b_, m_, buffer_);
-            _modmulOverflow(call_, a_, buffer_);
+            _modmulOverflow(a_, buffer_, call_);
 
             assembly {
                 mstore(call_, 0x80)

From 31a36ebc4848c1f42a255293245706b24cbab1a1 Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Fri, 17 Jan 2025 15:59:21 +0200
Subject: [PATCH 10/42] 15.8kk 2p

---
 contracts/libs/crypto/bn/U512.sol | 50 +++++++++++++++++++++++++++++--
 1 file changed, 48 insertions(+), 2 deletions(-)

diff --git a/contracts/libs/crypto/bn/U512.sol b/contracts/libs/crypto/bn/U512.sol
index 5866510d..29564b73 100644
--- a/contracts/libs/crypto/bn/U512.sol
+++ b/contracts/libs/crypto/bn/U512.sol
@@ -752,9 +752,55 @@ library U512 {
         }
     }
 
+    function _modmul2p(call call_, uint512 a_, uint512 b_) private pure {
+        unchecked {
+            assembly {
+                let a0_ := mload(a_)
+                let a1_ := mload(add(a_, 0x20))
+                let b0_ := mload(b_)
+                let b1_ := mload(add(b_, 0x20))
+
+                function mul2p(a, b) -> prod0, prod1 {
+                    let mm := mulmod(a, b, not(0))
+                    prod1 := mul(a, b)
+                    prod0 := sub(sub(mm, prod1), lt(mm, prod1))
+                }
+
+                let c0_ := 0
+                let c1_ := 0
+                let c2_ := 0
+                let c3_ := 0
+
+                c2_, c3_ := mul2p(a1_, b1_)
+
+                let prod0_, prod1_ := mul2p(a0_, b1_)
+                c2_ := add(c2_, prod1_)
+                c1_ := lt(c2_, prod1_)
+                c1_ := add(c1_, prod0_)
+                c0_ := lt(c1_, prod0_)
+
+                prod0_, prod1_ := mul2p(a1_, b0_)
+                c2_ := add(c2_, prod1_)
+                c1_ := add(c1_, lt(c2_, prod1_))
+                c1_ := add(c1_, prod0_)
+                c0_ := add(c0_, lt(c1_, prod0_))
+
+                prod0_, prod1_ := mul2p(a0_, b0_)
+                c1_ := add(c1_, prod1_)
+                c0_ := add(c0_, lt(c1_, prod1_))
+                c0_ := add(c0_, prod0_)
+
+                mstore(add(call_, 0xC0), c3_)
+                mstore(add(call_, 0xA0), c2_)
+                mstore(add(call_, 0x80), c1_)
+                mstore(add(call_, 0x60), c0_)
+            }
+        }
+    }
+
     function _modmul(call call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) private view {
         unchecked {
-            _modmulOverflow(a_, b_, call_);
+            _modmul2p(call_, a_, b_);
 
             assembly {
                 mstore(call_, 0x80)
@@ -774,7 +820,7 @@ library U512 {
             uint512 buffer_ = _buffer(call_);
 
             _modinv(call_, b_, m_, buffer_);
-            _modmulOverflow(a_, buffer_, call_);
+            _modmul2p(call_, a_, buffer_);
 
             assembly {
                 mstore(call_, 0x80)

From 4e00dc70460f9db6bf2554d40b96d1a94c2cfc70 Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Fri, 17 Jan 2025 16:10:32 +0200
Subject: [PATCH 11/42] 15.3kk

---
 contracts/libs/crypto/bn/U512.sol | 48 ++++++++++++++++++++-----------
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/contracts/libs/crypto/bn/U512.sol b/contracts/libs/crypto/bn/U512.sol
index 29564b73..823330d4 100644
--- a/contracts/libs/crypto/bn/U512.sol
+++ b/contracts/libs/crypto/bn/U512.sol
@@ -760,32 +760,48 @@ library U512 {
                 let b0_ := mload(b_)
                 let b1_ := mload(add(b_, 0x20))
 
-                function mul2p(a, b) -> prod0, prod1 {
-                    let mm := mulmod(a, b, not(0))
-                    prod1 := mul(a, b)
-                    prod0 := sub(sub(mm, prod1), lt(mm, prod1))
-                }
-
-                let c0_ := 0
-                let c1_ := 0
-                let c2_ := 0
-                let c3_ := 0
+                let mm_ := mulmod(
+                    a1_,
+                    b1_,
+                    0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+                )
+                let c3_ := mul(a1_, b1_)
+                let c2_ := sub(sub(mm_, c3_), lt(mm_, c3_))
 
-                c2_, c3_ := mul2p(a1_, b1_)
+                mm_ := mulmod(
+                    a0_,
+                    b1_,
+                    0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+                )
+                let prod1_ := mul(a0_, b1_)
+                let prod0_ := sub(sub(mm_, prod1_), lt(mm_, prod1_))
 
-                let prod0_, prod1_ := mul2p(a0_, b1_)
                 c2_ := add(c2_, prod1_)
-                c1_ := lt(c2_, prod1_)
+                let c1_ := lt(c2_, prod1_)
                 c1_ := add(c1_, prod0_)
-                c0_ := lt(c1_, prod0_)
+                let c0_ := lt(c1_, prod0_)
+
+                mm_ := mulmod(
+                    a1_,
+                    b0_,
+                    0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+                )
+                prod1_ := mul(a1_, b0_)
+                prod0_ := sub(sub(mm_, prod1_), lt(mm_, prod1_))
 
-                prod0_, prod1_ := mul2p(a1_, b0_)
                 c2_ := add(c2_, prod1_)
                 c1_ := add(c1_, lt(c2_, prod1_))
                 c1_ := add(c1_, prod0_)
                 c0_ := add(c0_, lt(c1_, prod0_))
 
-                prod0_, prod1_ := mul2p(a0_, b0_)
+                mm_ := mulmod(
+                    a0_,
+                    b0_,
+                    0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+                )
+                prod1_ := mul(a0_, b0_)
+                prod0_ := sub(sub(mm_, prod1_), lt(mm_, prod1_))
+
                 c1_ := add(c1_, prod1_)
                 c0_ := add(c0_, lt(c1_, prod1_))
                 c0_ := add(c0_, prod0_)

From 7facbabbfb03e97e3df429ef880f260974a2efc9 Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Fri, 17 Jan 2025 16:31:30 +0200
Subject: [PATCH 12/42] rm shl 15.48kk

---
 contracts/libs/crypto/ECDSA384.sol |  2 +-
 contracts/libs/crypto/bn/U512.sol  | 21 ---------------------
 2 files changed, 1 insertion(+), 22 deletions(-)

diff --git a/contracts/libs/crypto/ECDSA384.sol b/contracts/libs/crypto/ECDSA384.sol
index 167db0e5..d2b32b30 100644
--- a/contracts/libs/crypto/ECDSA384.sol
+++ b/contracts/libs/crypto/ECDSA384.sol
@@ -263,7 +263,7 @@ library ECDSA384 {
             U512.modmulAssign(call_, m1_, three_, p_);
             U512.modaddAssign(call_, m1_, a_, p_);
 
-            uint512 m2_ = U512.shl(call_, y1_, p_);
+            uint512 m2_ = U512.modmul(call_, y1_, two_, p_);
             U512.moddivAssign(call_, m1_, m2_, p_);
 
             x2_ = U512.modexp(call_, m1_, two_, p_);
diff --git a/contracts/libs/crypto/bn/U512.sol b/contracts/libs/crypto/bn/U512.sol
index 823330d4..143b9101 100644
--- a/contracts/libs/crypto/bn/U512.sol
+++ b/contracts/libs/crypto/bn/U512.sol
@@ -396,27 +396,6 @@ library U512 {
         }
     }
 
-    function shl(call call_, uint512 a_, uint512 p_) internal view returns (uint512 r_) {
-        unchecked {
-            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
-
-            _shl(call_, a_, p_, r_);
-        }
-    }
-
-    function _shl(call call_, uint512 a_, uint512 p_, uint512 r_) private view {
-        unchecked {
-            assembly {
-                let a1_ := mload(add(a_, 0x20))
-
-                mstore(r_, or(shl(1, mload(a_)), shr(255, a1_)))
-                mstore(add(r_, 0x20), shl(1, a1_))
-            }
-
-            _mod(call_, r_, p_, r_);
-        }
-    }
-
     function _mod(call call_, uint512 a_, uint512 m_, uint512 r_) private view {
         unchecked {
             assembly {

From 32a9716050cb4d0f8f415961e135c3f1ef51cc2d Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Mon, 20 Jan 2025 16:53:22 +0200
Subject: [PATCH 13/42] wip

---
 contracts/libs/{crypto => }/bn/U512.sol     | 191 +-------
 contracts/libs/crypto/ECDSA384.sol          | 361 ++++++++------
 contracts/libs/crypto/backend/U384.sol      | 498 ++++++++++++++++++++
 contracts/mock/libs/crypto/ECDSA384Mock.sol |   9 +-
 contracts/mock/libs/crypto/bn/U512Mock.sol  |   6 +-
 5 files changed, 734 insertions(+), 331 deletions(-)
 rename contracts/libs/{crypto => }/bn/U512.sol (74%)
 create mode 100644 contracts/libs/crypto/backend/U384.sol

diff --git a/contracts/libs/crypto/bn/U512.sol b/contracts/libs/bn/U512.sol
similarity index 74%
rename from contracts/libs/crypto/bn/U512.sol
rename to contracts/libs/bn/U512.sol
index 143b9101..e44a110e 100644
--- a/contracts/libs/crypto/bn/U512.sol
+++ b/contracts/libs/bn/U512.sol
@@ -548,185 +548,24 @@ library U512 {
     function _mul(uint512 a_, uint512 b_, uint512 r_) private pure {
         unchecked {
             assembly {
-                let a0_ := shr(128, mload(a_))
-                let a1_ := and(mload(a_), 0xffffffffffffffffffffffffffffffff)
-                let a2_ := shr(128, mload(add(a_, 0x20)))
-                let a3_ := and(mload(add(a_, 0x20)), 0xffffffffffffffffffffffffffffffff)
-
-                let b0_ := shr(128, mload(b_))
-                let b1_ := and(mload(b_), 0xffffffffffffffffffffffffffffffff)
-                let b2_ := shr(128, mload(add(b_, 0x20)))
-                let b3_ := and(mload(add(b_, 0x20)), 0xffffffffffffffffffffffffffffffff)
-
-                // r7
-                let current_ := mul(a3_, b3_)
-                let ri_ := and(current_, 0xffffffffffffffffffffffffffffffff)
-
-                // r6
-                current_ := shr(128, current_)
-
-                let temp_ := mul(a3_, b2_)
-                current_ := add(current_, temp_)
-                let curry_ := lt(current_, temp_)
-
-                temp_ := mul(a2_, b3_)
-                current_ := add(current_, temp_)
-                curry_ := add(curry_, lt(current_, temp_))
-
-                mstore(add(r_, 0x20), add(shl(128, current_), ri_))
-
-                // r5
-                current_ := add(shl(128, curry_), shr(128, current_))
-                curry_ := 0
-
-                temp_ := mul(a3_, b1_)
-                current_ := add(current_, temp_)
-                curry_ := lt(current_, temp_)
-
-                temp_ := mul(a2_, b2_)
-                current_ := add(current_, temp_)
-                curry_ := add(curry_, lt(current_, temp_))
-
-                temp_ := mul(a1_, b3_)
-                current_ := add(current_, temp_)
-                curry_ := add(curry_, lt(current_, temp_))
-
-                ri_ := and(current_, 0xffffffffffffffffffffffffffffffff)
-
-                // r4
-                current_ := add(shl(128, curry_), shr(128, current_))
-                curry_ := 0
-
-                temp_ := mul(a3_, b0_)
-                current_ := add(current_, temp_)
-                curry_ := lt(current_, temp_)
-
-                temp_ := mul(a2_, b1_)
-                current_ := add(current_, temp_)
-                curry_ := add(curry_, lt(current_, temp_))
-
-                temp_ := mul(a1_, b2_)
-                current_ := add(current_, temp_)
-                curry_ := add(curry_, lt(current_, temp_))
-
-                temp_ := mul(a0_, b2_)
-                current_ := add(current_, temp_)
-                curry_ := add(curry_, lt(current_, temp_))
-
-                mstore(r_, add(shl(128, current_), ri_))
-            }
-        }
-    }
-
-    function _modmulOverflow(uint512 a_, uint512 b_, call call_) private pure {
-        unchecked {
-            assembly {
-                let a3_ := and(mload(add(a_, 0x20)), 0xffffffffffffffffffffffffffffffff)
-                let b3_ := and(mload(add(b_, 0x20)), 0xffffffffffffffffffffffffffffffff)
-
-                let a2_ := shr(128, mload(add(a_, 0x20)))
-                let b2_ := shr(128, mload(add(b_, 0x20)))
-
-                let a1_ := and(mload(a_), 0xffffffffffffffffffffffffffffffff)
-                let b1_ := and(mload(b_), 0xffffffffffffffffffffffffffffffff)
-
-                let a0_ := shr(128, mload(a_))
-                let b0_ := shr(128, mload(b_))
-
-                // r7
-                let current_ := mul(a3_, b3_)
-                let r0_ := and(current_, 0xffffffffffffffffffffffffffffffff)
-
-                // r6
-                current_ := shr(128, current_)
-
-                let temp_ := mul(a2_, b3_)
-                current_ := add(current_, temp_)
-                let curry_ := lt(current_, temp_)
-
-                temp_ := mul(a3_, b2_)
-                current_ := add(current_, temp_)
-                curry_ := add(curry_, lt(current_, temp_))
-
-                mstore(add(call_, 0xC0), add(shl(128, current_), r0_))
-
-                // r5
-                current_ := add(shl(128, curry_), shr(128, current_))
-                curry_ := 0
-
-                temp_ := mul(a1_, b3_)
-                current_ := add(current_, temp_)
-                curry_ := lt(current_, temp_)
-
-                temp_ := mul(a2_, b2_)
-                current_ := add(current_, temp_)
-                curry_ := add(curry_, lt(current_, temp_))
-
-                temp_ := mul(a3_, b1_)
-                current_ := add(current_, temp_)
-                curry_ := add(curry_, lt(current_, temp_))
-
-                r0_ := and(current_, 0xffffffffffffffffffffffffffffffff)
-
-                // r4
-                current_ := add(shl(128, curry_), shr(128, current_))
-                curry_ := 0
-
-                temp_ := mul(a0_, b3_)
-                current_ := add(current_, temp_)
-                curry_ := lt(current_, temp_)
-
-                temp_ := mul(a1_, b2_)
-                current_ := add(current_, temp_)
-                curry_ := add(curry_, lt(current_, temp_))
-
-                temp_ := mul(a2_, b1_)
-                current_ := add(current_, temp_)
-                curry_ := add(curry_, lt(current_, temp_))
-
-                temp_ := mul(a3_, b0_)
-                current_ := add(current_, temp_)
-                curry_ := add(curry_, lt(current_, temp_))
-
-                mstore(add(call_, 0xA0), add(shl(128, current_), r0_))
-
-                // r3
-                current_ := add(shl(128, curry_), shr(128, current_))
-                curry_ := 0
-
-                temp_ := mul(a2_, b0_)
-                current_ := add(current_, temp_)
-                curry_ := lt(current_, temp_)
-
-                temp_ := mul(a1_, b1_)
-                current_ := add(current_, temp_)
-                curry_ := add(curry_, lt(current_, temp_))
-
-                temp_ := mul(a0_, b2_)
-                current_ := add(current_, temp_)
-                curry_ := add(curry_, lt(current_, temp_))
-
-                r0_ := and(current_, 0xffffffffffffffffffffffffffffffff)
-
-                // r2
-                current_ := add(shl(128, curry_), shr(128, current_))
-                curry_ := 0
-
-                temp_ := mul(a0_, b1_)
-                current_ := add(current_, temp_)
-                curry_ := lt(current_, temp_)
-
-                temp_ := mul(a1_, b0_)
-                current_ := add(current_, temp_)
-                curry_ := add(curry_, lt(current_, temp_))
+                let a0_ := mload(a_)
+                let a1_ := mload(add(a_, 0x20))
+                let b0_ := mload(b_)
+                let b1_ := mload(add(b_, 0x20))
 
-                mstore(add(call_, 0x80), add(shl(128, current_), r0_))
+                let mm_ := mulmod(
+                    a1_,
+                    b1_,
+                    0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+                )
+                let c3_ := mul(a1_, b1_)
+                let c2_ := sub(sub(mm_, c3_), lt(mm_, c3_))
 
-                // r1
-                current_ := add(shl(128, curry_), shr(128, current_))
-                current_ := add(current_, mul(a0_, b0_))
+                c2_ := add(c2_, mul(a0_, b1_))
+                c2_ := add(c2_, mul(a1_, b0_))
 
-                mstore(add(call_, 0x60), current_)
+                mstore(add(r_, 0x20), c3_)
+                mstore(r_, c2_)
             }
         }
     }
diff --git a/contracts/libs/crypto/ECDSA384.sol b/contracts/libs/crypto/ECDSA384.sol
index d2b32b30..f79eed01 100644
--- a/contracts/libs/crypto/ECDSA384.sol
+++ b/contracts/libs/crypto/ECDSA384.sol
@@ -1,18 +1,17 @@
 // SPDX-License-Identifier: MIT
 pragma solidity ^0.8.4;
 
-import {call, uint512} from "./bn/U512.sol";
-import {U512} from "./bn/U512.sol";
 import {MemoryUtils} from "../utils/MemoryUtils.sol";
+import {_U384} from "./backend/U384.sol";
 
 /**
  * @notice Cryptography module
  *
  * This library provides functionality for ECDSA verification over any 384-bit curve. Currently,
- * this is the most efficient implementation out there, consuming ~8.025 million gas per call.
+ * this is the most efficient implementation out there, consuming ~7.767 million gas per call.
  *
  * The approach is Strauss-Shamir double scalar multiplication with 6 bits of precompute + affine coordinates.
- * For reference, naive implementation uses ~400 billion gas, which is 50000 times more expensive.
+ * For reference, naive implementation uses ~400 billion gas, which is ~50000 times more expensive.
  *
  * We also tried using projective coordinates, however, the gas consumption rose to ~9 million gas.
  */
@@ -33,20 +32,20 @@ library ECDSA384 {
     }
 
     struct _Parameters {
-        uint512 a;
-        uint512 b;
-        uint512 gx;
-        uint512 gy;
-        uint512 p;
-        uint512 n;
-        uint512 lowSmax;
+        uint256 a;
+        uint256 b;
+        uint256 gx;
+        uint256 gy;
+        uint256 p;
+        uint256 n;
+        uint256 lowSmax;
     }
 
     struct _Inputs {
-        uint512 r;
-        uint512 s;
-        uint512 x;
-        uint512 y;
+        uint256 r;
+        uint256 s;
+        uint256 x;
+        uint256 y;
     }
 
     /**
@@ -68,27 +67,27 @@ library ECDSA384 {
         unchecked {
             _Inputs memory inputs_;
 
-            (inputs_.r, inputs_.s) = _u512FromBytes2(signature_);
-            (inputs_.x, inputs_.y) = _u512FromBytes2(pubKey_);
+            (inputs_.r, inputs_.s) = _u384FromBytes2(signature_);
+            (inputs_.x, inputs_.y) = _u384FromBytes2(pubKey_);
 
             _Parameters memory params_ = _Parameters({
-                a: U512.fromBytes(curveParams_.a),
-                b: U512.fromBytes(curveParams_.b),
-                gx: U512.fromBytes(curveParams_.gx),
-                gy: U512.fromBytes(curveParams_.gy),
-                p: U512.fromBytes(curveParams_.p),
-                n: U512.fromBytes(curveParams_.n),
-                lowSmax: U512.fromBytes(curveParams_.lowSmax)
+                a: _U384.fromBytes(curveParams_.a),
+                b: _U384.fromBytes(curveParams_.b),
+                gx: _U384.fromBytes(curveParams_.gx),
+                gy: _U384.fromBytes(curveParams_.gy),
+                p: _U384.fromBytes(curveParams_.p),
+                n: _U384.fromBytes(curveParams_.n),
+                lowSmax: _U384.fromBytes(curveParams_.lowSmax)
             });
 
-            call call_ = U512.initCall();
+            uint256 call_ = _U384.initCall(params_.p);
 
             /// accept s only from the lower part of the curve
             if (
-                U512.eqUint256(inputs_.r, 0) ||
-                U512.cmp(inputs_.r, params_.n) >= 0 ||
-                U512.eqUint256(inputs_.s, 0) ||
-                U512.cmp(inputs_.s, params_.lowSmax) > 0
+                _U384.eqUint256(inputs_.r, 0) ||
+                _U384.cmp(inputs_.r, params_.n) >= 0 ||
+                _U384.eqUint256(inputs_.s, 0) ||
+                _U384.cmp(inputs_.s, params_.lowSmax) > 0
             ) {
                 return false;
             }
@@ -97,21 +96,22 @@ library ECDSA384 {
                 return false;
             }
 
-            uint512 scalar1_ = U512.moddiv(
+            uint256 scalar1_ = _U384.moddiv(
                 call_,
-                U512.fromBytes(hashedMessage_),
+                _U384.fromBytes(hashedMessage_),
                 inputs_.s,
                 params_.n
             );
-            uint512 scalar2_ = U512.moddiv(call_, inputs_.r, inputs_.s, params_.n);
+            uint256 scalar2_ = _U384.moddiv(call_, inputs_.r, inputs_.s, params_.n);
 
             {
+                uint256 three_ = _U384.fromUint256(3);
+
                 /// We use 6-bit masks where the first 3 bits refer to `scalar1` and the last 3 bits refer to `scalar2`.
-                uint512[2][64] memory points_ = _precomputePointsTable(
+                uint256[2][64] memory points_ = _precomputePointsTable(
                     call_,
                     params_.p,
-                    U512.fromUint256(2),
-                    U512.fromUint256(3),
+                    three_,
                     params_.a,
                     inputs_.x,
                     inputs_.y,
@@ -122,8 +122,7 @@ library ECDSA384 {
                 (scalar1_, ) = _doubleScalarMultiplication(
                     call_,
                     params_.p,
-                    U512.fromUint256(2),
-                    U512.fromUint256(3),
+                    three_,
                     params_.a,
                     points_,
                     scalar1_,
@@ -131,9 +130,9 @@ library ECDSA384 {
                 );
             }
 
-            U512.modAssign(call_, scalar1_, params_.n);
+            _U384.modAssign(call_, scalar1_, params_.n);
 
-            return U512.eq(scalar1_, inputs_.r);
+            return _U384.eq(scalar1_, inputs_.r);
         }
     }
 
@@ -141,35 +140,35 @@ library ECDSA384 {
      * @dev Check if a point in affine coordinates is on the curve.
      */
     function _isOnCurve(
-        call call_,
-        uint512 p_,
-        uint512 a_,
-        uint512 b_,
-        uint512 x_,
-        uint512 y_
+        uint256 call_,
+        uint256 p_,
+        uint256 a_,
+        uint256 b_,
+        uint256 x_,
+        uint256 y_
     ) private view returns (bool) {
         unchecked {
             if (
-                U512.eqUint256(x_, 0) ||
-                U512.eq(x_, p_) ||
-                U512.eqUint256(y_, 0) ||
-                U512.eq(y_, p_)
+                _U384.eqUint256(x_, 0) ||
+                _U384.eq(x_, p_) ||
+                _U384.eqUint256(y_, 0) ||
+                _U384.eq(y_, p_)
             ) {
                 return false;
             }
 
-            uint512 lhs_ = U512.modexp(call_, y_, U512.fromUint256(2), p_);
-            uint512 rhs_ = U512.modexp(call_, x_, U512.fromUint256(3), p_);
+            uint256 lhs_ = _U384.modexp(call_, y_, 2);
+            uint256 rhs_ = _U384.modexp(call_, x_, 3);
 
-            if (!U512.eqUint256(a_, 0)) {
-                rhs_ = U512.modadd(call_, rhs_, U512.modmul(call_, x_, a_, p_), p_); // x^3 + a*x
+            if (!_U384.eqUint256(a_, 0)) {
+                rhs_ = _U384.modadd(rhs_, _U384.modmul(call_, x_, a_), p_); // x^3 + a*x
             }
 
-            if (!U512.eqUint256(b_, 0)) {
-                rhs_ = U512.modadd(call_, rhs_, b_, p_); // x^3 + a*x + b
+            if (!_U384.eqUint256(b_, 0)) {
+                rhs_ = _U384.modadd(rhs_, b_, p_); // x^3 + a*x + b
             }
 
-            return U512.eq(lhs_, rhs_);
+            return _U384.eq(lhs_, rhs_);
         }
     }
 
@@ -177,15 +176,14 @@ library ECDSA384 {
      * @dev Compute the Strauss-Shamir double scalar multiplication scalar1*G + scalar2*H.
      */
     function _doubleScalarMultiplication(
-        call call_,
-        uint512 p_,
-        uint512 two_,
-        uint512 three_,
-        uint512 a_,
-        uint512[2][64] memory points_,
-        uint512 scalar1_,
-        uint512 scalar2_
-    ) private view returns (uint512 x_, uint512 y_) {
+        uint256 call_,
+        uint256 p_,
+        uint256 three_,
+        uint256 a_,
+        uint256[2][64] memory points_,
+        uint256 scalar1_,
+        uint256 scalar2_
+    ) private view returns (uint256 x_, uint256 y_) {
         unchecked {
             uint256 mask_;
             uint256 mask1_;
@@ -198,13 +196,10 @@ library ECDSA384 {
                 mask_ = (mask1_ << 3) | mask2_;
 
                 if (mask_ != 0) {
-                    (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
-                    (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
-                    (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
+                    (x_, y_) = _twice3Affine(call_, p_, three_, a_, x_, y_);
                     (x_, y_) = _addAffine(
                         call_,
                         p_,
-                        two_,
                         three_,
                         a_,
                         points_[mask_][0],
@@ -219,7 +214,7 @@ library ECDSA384 {
         }
     }
 
-    function _getWord(uint512 scalar_, uint256 bit_) private pure returns (uint256) {
+    function _getWord(uint256 scalar_, uint256 bit_) private pure returns (uint256) {
         unchecked {
             uint256 word_;
             if (bit_ <= 253) {
@@ -242,37 +237,111 @@ library ECDSA384 {
      * @dev Double an elliptic curve point in affine coordinates.
      */
     function _twiceAffine(
-        call call_,
-        uint512 p_,
-        uint512 two_,
-        uint512 three_,
-        uint512 a_,
-        uint512 x1_,
-        uint512 y1_
-    ) private view returns (uint512 x2_, uint512 y2_) {
+        uint256 call_,
+        uint256 p_,
+        uint256 three_,
+        uint256 a_,
+        uint256 x1_,
+        uint256 y1_
+    ) private view returns (uint256 x2_, uint256 y2_) {
         unchecked {
-            if (U512.isNull(x1_)) {
-                return (x2_, y2_);
+            if (x1_ == 0) {
+                return (0, 0);
             }
 
-            if (U512.eqUint256(y1_, 0)) {
-                return (x2_, y2_);
+            if (_U384.eqUint256(y1_, 0)) {
+                return (0, 0);
             }
 
-            uint512 m1_ = U512.modexp(call_, x1_, two_, p_);
-            U512.modmulAssign(call_, m1_, three_, p_);
-            U512.modaddAssign(call_, m1_, a_, p_);
+            uint256 m1_ = _U384.modexp(call_, x1_, 2);
+            _U384.modmulAssign(call_, m1_, three_);
+            _U384.modaddAssign(m1_, a_, p_);
 
-            uint512 m2_ = U512.modmul(call_, y1_, two_, p_);
-            U512.moddivAssign(call_, m1_, m2_, p_);
+            uint256 m2_ = _U384.modshl1(y1_, p_);
+            _U384.moddivAssign(call_, m1_, m2_);
 
-            x2_ = U512.modexp(call_, m1_, two_, p_);
-            U512.modsubAssign(call_, x2_, x1_, p_);
-            U512.modsubAssign(call_, x2_, x1_, p_);
+            x2_ = _U384.modexp(call_, m1_, 2);
+            _U384.modsubAssign(x2_, x1_, p_);
+            _U384.modsubAssign(x2_, x1_, p_);
 
-            y2_ = U512.modsub(call_, x1_, x2_, p_);
-            U512.modmulAssign(call_, y2_, m1_, p_);
-            U512.modsubAssign(call_, y2_, y1_, p_);
+            y2_ = _U384.modsub(x1_, x2_, p_);
+            _U384.modmulAssign(call_, y2_, m1_);
+            _U384.modsubAssign(y2_, y1_, p_);
+        }
+    }
+
+    /**
+     * @dev Doubles an elliptic curve point 3 times in affine coordinates.
+     */
+    function _twice3Affine(
+        uint256 call_,
+        uint256 p_,
+        uint256 three_,
+        uint256 a_,
+        uint256 x1_,
+        uint256 y1_
+    ) private view returns (uint256 x2_, uint256 y2_) {
+        unchecked {
+            if (x1_ == 0) {
+                return (0, 0);
+            }
+
+            if (_U384.eqUint256(y1_, 0)) {
+                return (0, 0);
+            }
+
+            uint256 m1 = _U384.modexp(call_, x1_, 2);
+            _U384.modmulAssign(call_, m1, three_);
+            _U384.modaddAssign(m1, a_, p_);
+
+            uint256 m2 = _U384.modshl1(y1_, p_);
+            _U384.moddivAssign(call_, m1, m2);
+
+            x2_ = _U384.modexp(call_, m1, 2);
+            _U384.modsubAssign(x2_, x1_, p_);
+            _U384.modsubAssign(x2_, x1_, p_);
+
+            y2_ = _U384.modsub(x1_, x2_, p_);
+            _U384.modmulAssign(call_, y2_, m1);
+            _U384.modsubAssign(y2_, y1_, p_);
+
+            if (_U384.eqUint256(y2_, 0)) {
+                return (0, 0);
+            }
+
+            _U384.modexpAssignTo(call_, m1, x2_, 2);
+            _U384.modmulAssign(call_, m1, three_);
+            _U384.modaddAssign(m1, a_, p_);
+
+            _U384.modshl1AssignTo(m2, y2_, p_);
+            _U384.moddivAssign(call_, m1, m2);
+
+            _U384.modexpAssignTo(call_, x1_, m1, 2);
+            _U384.modsubAssign(x1_, x2_, p_);
+            _U384.modsubAssign(x1_, x2_, p_);
+
+            _U384.modsubAssignTo(y1_, x2_, x1_, p_);
+            _U384.modmulAssign(call_, y1_, m1);
+            _U384.modsubAssign(y1_, y2_, p_);
+
+            if (_U384.eqUint256(y1_, 0)) {
+                return (0, 0);
+            }
+
+            _U384.modexpAssignTo(call_, m1, x1_, 2);
+            _U384.modmulAssign(call_, m1, three_);
+            _U384.modaddAssign(m1, a_, p_);
+
+            _U384.modshl1AssignTo(m2, y1_, p_);
+            _U384.moddivAssign(call_, m1, m2);
+
+            _U384.modexpAssignTo(call_, x2_, m1, 2);
+            _U384.modsubAssign(x2_, x1_, p_);
+            _U384.modsubAssign(x2_, x1_, p_);
+
+            _U384.modsubAssignTo(y2_, x1_, x2_, p_);
+            _U384.modmulAssign(call_, y2_, m1);
+            _U384.modsubAssign(y2_, y1_, p_);
         }
     }
 
@@ -280,65 +349,63 @@ library ECDSA384 {
      * @dev Add two elliptic curve points in affine coordinates.
      */
     function _addAffine(
-        call call_,
-        uint512 p_,
-        uint512 two_,
-        uint512 three_,
-        uint512 a_,
-        uint512 x1_,
-        uint512 y1_,
-        uint512 x2_,
-        uint512 y2_
-    ) private view returns (uint512 x3, uint512 y3) {
+        uint256 call_,
+        uint256 p_,
+        uint256 three_,
+        uint256 a_,
+        uint256 x1_,
+        uint256 y1_,
+        uint256 x2_,
+        uint256 y2_
+    ) private view returns (uint256 x3, uint256 y3) {
         unchecked {
-            if (U512.isNull(x1_) || U512.isNull(x2_)) {
-                if (U512.isNull(x1_) && U512.isNull(x2_)) {
-                    return (x3, y3);
+            if (x1_ == 0 || x2_ == 0) {
+                if (x1_ == 0 && x2_ == 0) {
+                    return (0, 0);
                 }
 
                 return
-                    U512.isNull(x1_)
-                        ? (U512.copy(x2_), U512.copy(y2_))
-                        : (U512.copy(x1_), U512.copy(y1_));
+                    x1_ == 0
+                        ? (_U384.copy(x2_), _U384.copy(y2_))
+                        : (_U384.copy(x1_), _U384.copy(y1_));
             }
 
-            if (U512.eq(x1_, x2_)) {
-                if (U512.eq(y1_, y2_)) {
-                    return _twiceAffine(call_, p_, two_, three_, a_, x1_, y1_);
+            if (_U384.eq(x1_, x2_)) {
+                if (_U384.eq(y1_, y2_)) {
+                    return _twiceAffine(call_, p_, three_, a_, x1_, y1_);
                 }
 
-                return (x3, y3);
+                return (0, 0);
             }
 
-            uint512 m1_ = U512.modsub(call_, y1_, y2_, p_);
-            uint512 m2_ = U512.modsub(call_, x1_, x2_, p_);
+            uint256 m1_ = _U384.modsub(y1_, y2_, p_);
+            uint256 m2_ = _U384.modsub(x1_, x2_, p_);
 
-            U512.moddivAssign(call_, m1_, m2_, p_);
+            _U384.moddivAssign(call_, m1_, m2_);
 
-            x3 = U512.modexp(call_, m1_, two_, p_);
-            U512.modsubAssign(call_, x3, x1_, p_);
-            U512.modsubAssign(call_, x3, x2_, p_);
+            x3 = _U384.modexp(call_, m1_, 2);
+            _U384.modsubAssign(x3, x1_, p_);
+            _U384.modsubAssign(x3, x2_, p_);
 
-            y3 = U512.modsub(call_, x1_, x3, p_);
-            U512.modmulAssign(call_, y3, m1_, p_);
-            U512.modsubAssign(call_, y3, y1_, p_);
+            y3 = _U384.modsub(x1_, x3, p_);
+            _U384.modmulAssign(call_, y3, m1_);
+            _U384.modsubAssign(y3, y1_, p_);
         }
     }
 
     function _precomputePointsTable(
-        call call_,
-        uint512 p_,
-        uint512 two_,
-        uint512 three_,
-        uint512 a_,
-        uint512 hx_,
-        uint512 hy_,
-        uint512 gx_,
-        uint512 gy_
-    ) private view returns (uint512[2][64] memory points_) {
+        uint256 call_,
+        uint256 p_,
+        uint256 three_,
+        uint256 a_,
+        uint256 hx_,
+        uint256 hy_,
+        uint256 gx_,
+        uint256 gy_
+    ) private view returns (uint256[2][64] memory points_) {
         unchecked {
-            (points_[0x01][0], points_[0x01][1]) = (U512.copy(hx_), U512.copy(hy_));
-            (points_[0x08][0], points_[0x08][1]) = (U512.copy(gx_), U512.copy(gy_));
+            (points_[0x01][0], points_[0x01][1]) = (_U384.copy(hx_), _U384.copy(hy_));
+            (points_[0x08][0], points_[0x08][1]) = (_U384.copy(gx_), _U384.copy(gy_));
 
             for (uint256 i = 0; i < 8; ++i) {
                 for (uint256 j = 0; j < 8; ++j) {
@@ -346,31 +413,31 @@ library ECDSA384 {
                         continue;
                     }
 
-                    uint256 maskTo = (i << 3) | j;
+                    uint256[2] memory pointTo_ = points_[(i << 3) | j];
 
                     if (i != 0) {
-                        uint256 maskFrom = ((i - 1) << 3) | j;
+                        uint256[2] memory pointFrom_ = points_[((i - 1) << 3) | j];
 
-                        (points_[maskTo][0], points_[maskTo][1]) = _addAffine(
+                        (pointTo_[0], pointTo_[1]) = _addAffine(
                             call_,
                             p_,
-                            two_,
                             three_,
                             a_,
-                            points_[maskFrom][0],
-                            points_[maskFrom][1],
+                            pointFrom_[0],
+                            pointFrom_[1],
                             gx_,
                             gy_
                         );
                     } else {
-                        (points_[maskTo][0], points_[maskTo][1]) = _addAffine(
+                        uint256[2] memory pointFrom_ = points_[(i << 3) | (j - 1)];
+
+                        (pointTo_[0], pointTo_[1]) = _addAffine(
                             call_,
                             p_,
-                            two_,
                             three_,
                             a_,
-                            points_[(i << 3) | (j - 1)][0],
-                            points_[(i << 3) | (j - 1)][1],
+                            pointFrom_[0],
+                            pointFrom_[1],
                             hx_,
                             hy_
                         );
@@ -383,9 +450,9 @@ library ECDSA384 {
     }
 
     /**
-     * @dev Convert 96 bytes to two 512-bit unsigned integers.
+     * @dev Convert 96 bytes to two 384-bit unsigned integers.
      */
-    function _u512FromBytes2(bytes memory bytes_) private view returns (uint512, uint512) {
+    function _u384FromBytes2(bytes memory bytes_) private view returns (uint256, uint256) {
         unchecked {
             bytes memory lhs_ = new bytes(48);
             bytes memory rhs_ = new bytes(48);
@@ -393,7 +460,7 @@ library ECDSA384 {
             MemoryUtils.unsafeCopy(bytes_.getDataPointer(), lhs_.getDataPointer(), 48);
             MemoryUtils.unsafeCopy(bytes_.getDataPointer() + 48, rhs_.getDataPointer(), 48);
 
-            return (U512.fromBytes(lhs_), U512.fromBytes(rhs_));
+            return (_U384.fromBytes(lhs_), _U384.fromBytes(rhs_));
         }
     }
 }
diff --git a/contracts/libs/crypto/backend/U384.sol b/contracts/libs/crypto/backend/U384.sol
new file mode 100644
index 00000000..31cddb7f
--- /dev/null
+++ b/contracts/libs/crypto/backend/U384.sol
@@ -0,0 +1,498 @@
+// SPDX-License-Identifier: MIT
+pragma solidity ^0.8.4;
+
+/**
+ * @notice Low-level utility library that implements unsigned 384-bit arithmetics.
+ *
+ * Serves for internal purposes only.
+ */
+library _U384 {
+    uint256 private constant _UINT384_ALLOCATION = 64;
+    uint256 private constant _CALL_ALLOCATION = 4 * 288;
+    uint256 private constant _MUL_OFFSET = 288;
+    uint256 private constant _EXP_OFFSET = 2 * 288;
+    uint256 private constant _INV_OFFSET = 3 * 288;
+
+    function initCall(uint256 m_) internal pure returns (uint256 handler_) {
+        unchecked {
+            handler_ = _allocate(_CALL_ALLOCATION);
+
+            _sub(m_, fromUint256(2), handler_ + _INV_OFFSET + 0xA0);
+
+            assembly {
+                let call_ := add(handler_, _MUL_OFFSET)
+
+                mstore(call_, 0x60)
+                mstore(add(0x20, call_), 0x20)
+                mstore(add(0x40, call_), 0x40)
+                mstore(add(0xC0, call_), 0x01)
+                mstore(add(0xE0, call_), mload(m_))
+                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
+
+                call_ := add(handler_, _EXP_OFFSET)
+
+                mstore(call_, 0x40)
+                mstore(add(0x20, call_), 0x20)
+                mstore(add(0x40, call_), 0x40)
+                mstore(add(0xC0, call_), mload(m_))
+                mstore(add(0xE0, call_), mload(add(m_, 0x20)))
+
+                call_ := add(handler_, _INV_OFFSET)
+
+                mstore(call_, 0x40)
+                mstore(add(0x20, call_), 0x40)
+                mstore(add(0x40, call_), 0x40)
+                mstore(add(0xE0, call_), mload(m_))
+                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
+            }
+        }
+    }
+
+    function fromUint256(uint256 u256_) internal pure returns (uint256 handler_) {
+        unchecked {
+            handler_ = _allocate(_UINT384_ALLOCATION);
+
+            assembly {
+                mstore(handler_, 0x00)
+                mstore(add(handler_, 0x20), u256_)
+            }
+        }
+    }
+
+    function fromBytes(bytes memory bytes_) internal view returns (uint256 handler_) {
+        unchecked {
+            assert(bytes_.length < 49);
+
+            handler_ = _allocate(_UINT384_ALLOCATION);
+
+            assembly {
+                mstore(handler_, 0)
+                mstore(add(handler_, 0x20), 0)
+
+                let size_ := mload(bytes_)
+                pop(
+                    staticcall(
+                        gas(),
+                        0x4,
+                        add(bytes_, 0x20),
+                        size_,
+                        add(handler_, sub(0x40, size_)),
+                        size_
+                    )
+                )
+            }
+        }
+    }
+
+    function copy(uint256 handler_) internal pure returns (uint256 handlerCopy_) {
+        unchecked {
+            handlerCopy_ = _allocate(_UINT384_ALLOCATION);
+
+            assembly {
+                mstore(handlerCopy_, mload(handler_))
+                mstore(add(handlerCopy_, 0x20), mload(add(handler_, 0x20)))
+            }
+
+            return handlerCopy_;
+        }
+    }
+
+    function eq(uint256 a_, uint256 b_) internal pure returns (bool eq_) {
+        assembly {
+            eq_ := and(eq(mload(a_), mload(b_)), eq(mload(add(a_, 0x20)), mload(add(b_, 0x20))))
+        }
+    }
+
+    function eqUint256(uint256 a_, uint256 bInteger_) internal pure returns (bool eq_) {
+        assembly {
+            eq_ := and(eq(mload(a_), 0), eq(mload(add(a_, 0x20)), bInteger_))
+        }
+    }
+
+    function cmp(uint256 a_, uint256 b_) internal pure returns (int256) {
+        unchecked {
+            uint256 aWord_;
+            uint256 bWord_;
+
+            assembly {
+                aWord_ := mload(a_)
+                bWord_ := mload(b_)
+            }
+
+            if (aWord_ > bWord_) {
+                return 1;
+            }
+
+            if (aWord_ < bWord_) {
+                return -1;
+            }
+
+            assembly {
+                aWord_ := mload(add(a_, 0x20))
+                bWord_ := mload(add(b_, 0x20))
+            }
+
+            if (aWord_ > bWord_) {
+                return 1;
+            }
+
+            if (aWord_ < bWord_) {
+                return -1;
+            }
+
+            return 0;
+        }
+    }
+
+    function modAssign(uint256 call_, uint256 a_, uint256 m_) internal view {
+        assembly {
+            mstore(call_, 0x40)
+            mstore(add(0x20, call_), 0x20)
+            mstore(add(0x40, call_), 0x40)
+            mstore(add(0x60, call_), mload(a_))
+            mstore(add(0x80, call_), mload(add(a_, 0x20)))
+            mstore(add(0xA0, call_), 0x01)
+            mstore(add(0xC0, call_), mload(m_))
+            mstore(add(0xE0, call_), mload(add(m_, 0x20)))
+
+            pop(staticcall(gas(), 0x5, call_, 0x0100, a_, 0x40))
+        }
+    }
+
+    function modexp(
+        uint256 call_,
+        uint256 b_,
+        uint256 eInteger_
+    ) internal view returns (uint256 r_) {
+        unchecked {
+            r_ = _allocate(_UINT384_ALLOCATION);
+
+            assembly {
+                call_ := add(call_, _EXP_OFFSET)
+
+                mstore(add(0x60, call_), mload(b_))
+                mstore(add(0x80, call_), mload(add(b_, 0x20)))
+                mstore(add(0xA0, call_), eInteger_)
+
+                pop(staticcall(gas(), 0x5, call_, 0x0100, r_, 0x40))
+            }
+
+            return r_;
+        }
+    }
+
+    function modexpAssignTo(
+        uint256 call_,
+        uint256 to_,
+        uint256 b_,
+        uint256 eInteger_
+    ) internal view {
+        assembly {
+            call_ := add(call_, _EXP_OFFSET)
+
+            mstore(add(0x60, call_), mload(b_))
+            mstore(add(0x80, call_), mload(add(b_, 0x20)))
+            mstore(add(0xA0, call_), eInteger_)
+
+            pop(staticcall(gas(), 0x5, call_, 0x0100, to_, 0x40))
+        }
+    }
+
+    function modadd(uint256 a_, uint256 b_, uint256 m_) internal pure returns (uint256 r_) {
+        unchecked {
+            r_ = _allocate(_UINT384_ALLOCATION);
+
+            _add(a_, b_, r_);
+
+            if (cmp(r_, m_) >= 0) {
+                _subFrom(r_, m_);
+            }
+
+            return r_;
+        }
+    }
+
+    function modaddAssign(uint256 a_, uint256 b_, uint256 m_) internal pure {
+        unchecked {
+            _addTo(a_, b_);
+
+            if (cmp(a_, m_) >= 0) {
+                return _subFrom(a_, m_);
+            }
+        }
+    }
+
+    function modmul(uint256 call_, uint256 a_, uint256 b_) internal view returns (uint256 r_) {
+        unchecked {
+            r_ = _allocate(_UINT384_ALLOCATION);
+
+            _mul(a_, b_, call_ + _MUL_OFFSET + 0x60);
+
+            assembly {
+                call_ := add(call_, _MUL_OFFSET)
+
+                pop(staticcall(gas(), 0x5, call_, 0x0120, r_, 0x40))
+            }
+
+            return r_;
+        }
+    }
+
+    function modmulAssign(uint256 call_, uint256 a_, uint256 b_) internal view {
+        unchecked {
+            _mul(a_, b_, call_ + _MUL_OFFSET + 0x60);
+
+            assembly {
+                call_ := add(call_, _MUL_OFFSET)
+
+                pop(staticcall(gas(), 0x5, call_, 0x0120, a_, 0x40))
+            }
+        }
+    }
+
+    function modsub(uint256 a_, uint256 b_, uint256 m_) internal pure returns (uint256 r_) {
+        unchecked {
+            r_ = _allocate(_UINT384_ALLOCATION);
+
+            if (cmp(a_, b_) >= 0) {
+                _sub(a_, b_, r_);
+                return r_;
+            }
+
+            _add(a_, m_, r_);
+            _subFrom(r_, b_);
+        }
+    }
+
+    function modsubAssign(uint256 a_, uint256 b_, uint256 m_) internal pure {
+        unchecked {
+            if (cmp(a_, b_) >= 0) {
+                _subFrom(a_, b_);
+                return;
+            }
+
+            _addTo(a_, m_);
+            _subFrom(a_, b_);
+        }
+    }
+
+    function modsubAssignTo(uint256 to_, uint256 a_, uint256 b_, uint256 m_) internal pure {
+        unchecked {
+            if (cmp(a_, b_) >= 0) {
+                _sub(a_, b_, to_);
+                return;
+            }
+
+            _add(a_, m_, to_);
+            _subFrom(to_, b_);
+        }
+    }
+
+    function modshl1(uint256 a_, uint256 m_) internal pure returns (uint256 r_) {
+        unchecked {
+            r_ = _allocate(_UINT384_ALLOCATION);
+
+            _shl1(a_, r_);
+
+            if (cmp(r_, m_) >= 0) {
+                _subFrom(r_, m_);
+            }
+
+            return r_;
+        }
+    }
+
+    function modshl1AssignTo(uint256 to_, uint256 a_, uint256 m_) internal pure {
+        unchecked {
+            _shl1(a_, to_);
+
+            if (cmp(to_, m_) >= 0) {
+                _subFrom(to_, m_);
+            }
+        }
+    }
+
+    /// @dev Stores modinv into `b_` and moddiv into `a_`.
+    function moddivAssign(uint256 call_, uint256 a_, uint256 b_) internal view {
+        unchecked {
+            assembly {
+                call_ := add(call_, _INV_OFFSET)
+
+                mstore(add(0x60, call_), mload(b_))
+                mstore(add(0x80, call_), mload(add(b_, 0x20)))
+
+                pop(staticcall(gas(), 0x5, call_, 0x0120, b_, 0x40))
+            }
+
+            modmulAssign(call_ - _INV_OFFSET, a_, b_);
+        }
+    }
+
+    function moddiv(
+        uint256 call_,
+        uint256 a_,
+        uint256 b_,
+        uint256 m_
+    ) internal view returns (uint256 r_) {
+        unchecked {
+            r_ = modinv(call_, b_, m_);
+
+            _mul(a_, r_, call_ + 0x60);
+
+            assembly {
+                mstore(call_, 0x60)
+                mstore(add(0x20, call_), 0x20)
+                mstore(add(0x40, call_), 0x40)
+                mstore(add(0xC0, call_), 0x01)
+                mstore(add(0xE0, call_), mload(m_))
+                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
+
+                pop(staticcall(gas(), 0x5, call_, 0x0120, r_, 0x40))
+            }
+        }
+    }
+
+    function modinv(uint256 call_, uint256 b_, uint256 m_) internal view returns (uint256 r_) {
+        unchecked {
+            r_ = _allocate(_UINT384_ALLOCATION);
+
+            _sub(m_, fromUint256(2), call_ + 0xA0);
+
+            assembly {
+                mstore(call_, 0x40)
+                mstore(add(0x20, call_), 0x40)
+                mstore(add(0x40, call_), 0x40)
+                mstore(add(0x60, call_), mload(b_))
+                mstore(add(0x80, call_), mload(add(b_, 0x20)))
+                mstore(add(0xE0, call_), mload(m_))
+                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
+
+                pop(staticcall(gas(), 0x5, call_, 0x0120, r_, 0x40))
+            }
+        }
+    }
+
+    function _shl1(uint256 a_, uint256 r_) internal pure {
+        assembly {
+            let a1_ := mload(add(a_, 0x20))
+
+            mstore(r_, or(shl(1, mload(a_)), shr(255, a1_)))
+            mstore(add(r_, 0x20), shl(1, a1_))
+        }
+    }
+
+    function _add(uint256 a_, uint256 b_, uint256 r_) private pure {
+        assembly {
+            let aWord_ := mload(add(a_, 0x20))
+            let sum_ := add(aWord_, mload(add(b_, 0x20)))
+
+            mstore(add(r_, 0x20), sum_)
+
+            sum_ := gt(aWord_, sum_)
+            sum_ := add(sum_, add(mload(a_), mload(b_)))
+
+            mstore(r_, sum_)
+        }
+    }
+
+    function _sub(uint256 a_, uint256 b_, uint256 r_) private pure {
+        assembly {
+            let aWord_ := mload(add(a_, 0x20))
+            let diff_ := sub(aWord_, mload(add(b_, 0x20)))
+
+            mstore(add(r_, 0x20), diff_)
+
+            diff_ := gt(diff_, aWord_)
+            diff_ := sub(sub(mload(a_), mload(b_)), diff_)
+
+            mstore(r_, diff_)
+        }
+    }
+
+    function _subFrom(uint256 a_, uint256 b_) private pure {
+        assembly {
+            let aWord_ := mload(add(a_, 0x20))
+            let diff_ := sub(aWord_, mload(add(b_, 0x20)))
+
+            mstore(add(a_, 0x20), diff_)
+
+            diff_ := gt(diff_, aWord_)
+            diff_ := sub(sub(mload(a_), mload(b_)), diff_)
+
+            mstore(a_, diff_)
+        }
+    }
+
+    function _addTo(uint256 a_, uint256 b_) private pure {
+        assembly {
+            let aWord_ := mload(add(a_, 0x20))
+            let sum_ := add(aWord_, mload(add(b_, 0x20)))
+
+            mstore(add(a_, 0x20), sum_)
+
+            sum_ := gt(aWord_, sum_)
+            sum_ := add(sum_, add(mload(a_), mload(b_)))
+
+            mstore(a_, sum_)
+        }
+    }
+
+    function _mul(uint256 a_, uint256 b_, uint256 r_) private pure {
+        unchecked {
+            assembly {
+                let a0_ := mload(a_)
+                let a1_ := mload(add(a_, 0x20))
+                let b0_ := mload(b_)
+                let b1_ := mload(add(b_, 0x20))
+
+                let mm_ := mulmod(
+                    a1_,
+                    b1_,
+                    0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+                )
+                let c3_ := mul(a1_, b1_)
+                let c2_ := sub(sub(mm_, c3_), lt(mm_, c3_))
+
+                mm_ := mulmod(
+                    a0_,
+                    b1_,
+                    0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+                )
+                let prod1_ := mul(a0_, b1_)
+                let prod0_ := sub(sub(mm_, prod1_), lt(mm_, prod1_))
+
+                c2_ := add(c2_, prod1_)
+                let c1_ := lt(c2_, prod1_)
+                c1_ := add(c1_, prod0_)
+
+                mm_ := mulmod(
+                    a1_,
+                    b0_,
+                    0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+                )
+                prod1_ := mul(a1_, b0_)
+                prod0_ := sub(sub(mm_, prod1_), lt(mm_, prod1_))
+
+                c2_ := add(c2_, prod1_)
+                c1_ := add(c1_, lt(c2_, prod1_))
+                c1_ := add(c1_, prod0_)
+                c1_ := add(c1_, mul(a0_, b0_))
+
+                mstore(add(r_, 0x40), c3_)
+                mstore(add(r_, 0x20), c2_)
+                mstore(r_, c1_)
+            }
+        }
+    }
+
+    function _allocate(uint256 bytes_) private pure returns (uint256 handler_) {
+        unchecked {
+            assembly {
+                handler_ := mload(0x40)
+                mstore(0x40, add(handler_, bytes_))
+            }
+
+            return handler_;
+        }
+    }
+}
diff --git a/contracts/mock/libs/crypto/ECDSA384Mock.sol b/contracts/mock/libs/crypto/ECDSA384Mock.sol
index 0d0f606e..eea369fd 100644
--- a/contracts/mock/libs/crypto/ECDSA384Mock.sol
+++ b/contracts/mock/libs/crypto/ECDSA384Mock.sol
@@ -1,9 +1,8 @@
 // SPDX-License-Identifier: MIT
 pragma solidity ^0.8.4;
 
-import {uint512} from "../../../libs/crypto/bn/U512.sol";
-import {U512} from "../../../libs/crypto/bn/U512.sol";
 import {ECDSA384} from "../../../libs/crypto/ECDSA384.sol";
+import {_U384} from "../../../libs/crypto/backend/U384.sol";
 
 contract ECDSA384Mock {
     using ECDSA384 for *;
@@ -75,8 +74,8 @@ contract ECDSA384Mock {
     }
 
     function cmpMock() external pure returns (int256 cmp_) {
-        uint512 a_;
-        uint512 b_;
+        uint256 a_;
+        uint256 b_;
 
         assembly {
             a_ := mload(0x40)
@@ -88,6 +87,6 @@ contract ECDSA384Mock {
             mstore(0x40, add(b_, 0x40))
         }
 
-        return U512.cmp(a_, b_);
+        return _U384.cmp(a_, b_);
     }
 }
diff --git a/contracts/mock/libs/crypto/bn/U512Mock.sol b/contracts/mock/libs/crypto/bn/U512Mock.sol
index b94f6564..3e4a8473 100644
--- a/contracts/mock/libs/crypto/bn/U512Mock.sol
+++ b/contracts/mock/libs/crypto/bn/U512Mock.sol
@@ -1,9 +1,9 @@
 // SPDX-License-Identifier: MIT
 pragma solidity ^0.8.4;
 
-import {call} from "../../../../libs/crypto/bn/U512.sol";
-import {uint512} from "../../../../libs/crypto/bn/U512.sol";
-import {U512} from "../../../../libs/crypto/bn/U512.sol";
+import {call} from "../../../../libs/bn/U512.sol";
+import {uint512} from "../../../../libs/bn/U512.sol";
+import {U512} from "../../../../libs/bn/U512.sol";
 
 contract U512Mock {
     using U512 for *;

From 9efa52b0b707cc2d7ad897a63bfc29716e948e79 Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Mon, 20 Jan 2025 18:01:35 +0200
Subject: [PATCH 14/42] wip

---
 contracts/libs/crypto/ECDSA512.sol            | 466 ++++++++++++++++
 contracts/libs/crypto/backend/U512.sol        | 498 ++++++++++++++++++
 .../mock/libs/{crypto => }/bn/U512Mock.sol    |   6 +-
 contracts/mock/libs/crypto/ECDSA512Mock.sol   |  27 +
 test/libs/{crypto => }/bn/U512.test.ts        |  25 +
 test/libs/crypto/ECDSA512.test.ts             |  34 ++
 6 files changed, 1053 insertions(+), 3 deletions(-)
 create mode 100644 contracts/libs/crypto/ECDSA512.sol
 create mode 100644 contracts/libs/crypto/backend/U512.sol
 rename contracts/mock/libs/{crypto => }/bn/U512Mock.sol (93%)
 create mode 100644 contracts/mock/libs/crypto/ECDSA512Mock.sol
 rename test/libs/{crypto => }/bn/U512.test.ts (68%)
 create mode 100644 test/libs/crypto/ECDSA512.test.ts

diff --git a/contracts/libs/crypto/ECDSA512.sol b/contracts/libs/crypto/ECDSA512.sol
new file mode 100644
index 00000000..0616effc
--- /dev/null
+++ b/contracts/libs/crypto/ECDSA512.sol
@@ -0,0 +1,466 @@
+// SPDX-License-Identifier: MIT
+pragma solidity ^0.8.4;
+
+import {MemoryUtils} from "../utils/MemoryUtils.sol";
+import {_U512} from "./backend/U512.sol";
+
+/**
+ * @notice Cryptography module
+ *
+ * This library provides functionality for ECDSA verification over any 384-bit curve. Currently,
+ * this is the most efficient implementation out there, consuming ~7.767 million gas per call.
+ *
+ * The approach is Strauss-Shamir double scalar multiplication with 6 bits of precompute + affine coordinates.
+ * For reference, naive implementation uses ~400 billion gas, which is ~50000 times more expensive.
+ *
+ * We also tried using projective coordinates, however, the gas consumption rose to ~9 million gas.
+ */
+library ECDSA512 {
+    using MemoryUtils for *;
+
+    /**
+     * @notice 384-bit curve parameters.
+     */
+    struct Parameters {
+        bytes a;
+        bytes b;
+        bytes gx;
+        bytes gy;
+        bytes p;
+        bytes n;
+        bytes lowSmax;
+    }
+
+    struct _Parameters {
+        uint256 a;
+        uint256 b;
+        uint256 gx;
+        uint256 gy;
+        uint256 p;
+        uint256 n;
+        uint256 lowSmax;
+    }
+
+    struct _Inputs {
+        uint256 r;
+        uint256 s;
+        uint256 x;
+        uint256 y;
+    }
+
+    /**
+     * @notice The function to verify the ECDSA signature
+     * @param curveParams_ the 384-bit curve parameters. `lowSmax` is `n / 2`.
+     * @param hashedMessage_ the already hashed message to be verified.
+     * @param signature_ the ECDSA signature. Equals to `bytes(r) + bytes(s)`.
+     * @param pubKey_ the full public key of a signer. Equals to `bytes(x) + bytes(y)`.
+     *
+     * Note that signatures only from the lower part of the curve are accepted.
+     * If your `s > n / 2`, change it to `s = n - s`.
+     */
+    function verify(
+        Parameters memory curveParams_,
+        bytes memory hashedMessage_,
+        bytes memory signature_,
+        bytes memory pubKey_
+    ) internal view returns (bool) {
+        unchecked {
+            _Inputs memory inputs_;
+
+            (inputs_.r, inputs_.s) = _u384FromBytes2(signature_);
+            (inputs_.x, inputs_.y) = _u384FromBytes2(pubKey_);
+
+            _Parameters memory params_ = _Parameters({
+                a: _U512.fromBytes(curveParams_.a),
+                b: _U512.fromBytes(curveParams_.b),
+                gx: _U512.fromBytes(curveParams_.gx),
+                gy: _U512.fromBytes(curveParams_.gy),
+                p: _U512.fromBytes(curveParams_.p),
+                n: _U512.fromBytes(curveParams_.n),
+                lowSmax: _U512.fromBytes(curveParams_.lowSmax)
+            });
+
+            uint256 call_ = _U512.initCall(params_.p);
+
+            /// accept s only from the lower part of the curve
+            if (
+                _U512.eqUint256(inputs_.r, 0) ||
+                _U512.cmp(inputs_.r, params_.n) >= 0 ||
+                _U512.eqUint256(inputs_.s, 0) ||
+                _U512.cmp(inputs_.s, params_.lowSmax) > 0
+            ) {
+                return false;
+            }
+
+            if (!_isOnCurve(call_, params_.p, params_.a, params_.b, inputs_.x, inputs_.y)) {
+                return false;
+            }
+
+            uint256 scalar1_ = _U512.moddiv(
+                call_,
+                _U512.fromBytes(hashedMessage_),
+                inputs_.s,
+                params_.n
+            );
+            uint256 scalar2_ = _U512.moddiv(call_, inputs_.r, inputs_.s, params_.n);
+
+            {
+                uint256 three_ = _U512.fromUint256(3);
+
+                /// We use 6-bit masks where the first 3 bits refer to `scalar1` and the last 3 bits refer to `scalar2`.
+                uint256[2][64] memory points_ = _precomputePointsTable(
+                    call_,
+                    params_.p,
+                    three_,
+                    params_.a,
+                    inputs_.x,
+                    inputs_.y,
+                    params_.gx,
+                    params_.gy
+                );
+
+                (scalar1_, ) = _doubleScalarMultiplication(
+                    call_,
+                    params_.p,
+                    three_,
+                    params_.a,
+                    points_,
+                    scalar1_,
+                    scalar2_
+                );
+            }
+
+            _U512.modAssign(call_, scalar1_, params_.n);
+
+            return _U512.eq(scalar1_, inputs_.r);
+        }
+    }
+
+    /**
+     * @dev Check if a point in affine coordinates is on the curve.
+     */
+    function _isOnCurve(
+        uint256 call_,
+        uint256 p_,
+        uint256 a_,
+        uint256 b_,
+        uint256 x_,
+        uint256 y_
+    ) private view returns (bool) {
+        unchecked {
+            if (
+                _U512.eqUint256(x_, 0) ||
+                _U512.eq(x_, p_) ||
+                _U512.eqUint256(y_, 0) ||
+                _U512.eq(y_, p_)
+            ) {
+                return false;
+            }
+
+            uint256 lhs_ = _U512.modexp(call_, y_, 2);
+            uint256 rhs_ = _U512.modexp(call_, x_, 3);
+
+            if (!_U512.eqUint256(a_, 0)) {
+                rhs_ = _U512.modadd(rhs_, _U512.modmul(call_, x_, a_), p_); // x^3 + a*x
+            }
+
+            if (!_U512.eqUint256(b_, 0)) {
+                rhs_ = _U512.modadd(rhs_, b_, p_); // x^3 + a*x + b
+            }
+
+            return _U512.eq(lhs_, rhs_);
+        }
+    }
+
+    /**
+     * @dev Compute the Strauss-Shamir double scalar multiplication scalar1*G + scalar2*H.
+     */
+    function _doubleScalarMultiplication(
+        uint256 call_,
+        uint256 p_,
+        uint256 three_,
+        uint256 a_,
+        uint256[2][64] memory points_,
+        uint256 scalar1_,
+        uint256 scalar2_
+    ) private view returns (uint256 x_, uint256 y_) {
+        unchecked {
+            uint256 mask_;
+            uint256 mask1_;
+            uint256 mask2_;
+
+            for (uint256 bit = 3; bit <= 384; bit += 3) {
+                mask1_ = _getWord(scalar1_, 384 - bit);
+                mask2_ = _getWord(scalar2_, 384 - bit);
+
+                mask_ = (mask1_ << 3) | mask2_;
+
+                if (mask_ != 0) {
+                    (x_, y_) = _twice3Affine(call_, p_, three_, a_, x_, y_);
+                    (x_, y_) = _addAffine(
+                        call_,
+                        p_,
+                        three_,
+                        a_,
+                        points_[mask_][0],
+                        points_[mask_][1],
+                        x_,
+                        y_
+                    );
+                }
+            }
+
+            return (x_, y_);
+        }
+    }
+
+    function _getWord(uint256 scalar_, uint256 bit_) private pure returns (uint256) {
+        unchecked {
+            uint256 word_;
+            if (bit_ <= 253) {
+                assembly {
+                    word_ := mload(add(scalar_, 0x20))
+                }
+
+                return (word_ >> bit_) & 0x07;
+            }
+
+            assembly {
+                word_ := mload(add(scalar_, 0x10))
+            }
+
+            return (word_ >> (bit_ - 128)) & 0x07;
+        }
+    }
+
+    /**
+     * @dev Double an elliptic curve point in affine coordinates.
+     */
+    function _twiceAffine(
+        uint256 call_,
+        uint256 p_,
+        uint256 three_,
+        uint256 a_,
+        uint256 x1_,
+        uint256 y1_
+    ) private view returns (uint256 x2_, uint256 y2_) {
+        unchecked {
+            if (x1_ == 0) {
+                return (0, 0);
+            }
+
+            if (_U512.eqUint256(y1_, 0)) {
+                return (0, 0);
+            }
+
+            uint256 m1_ = _U512.modexp(call_, x1_, 2);
+            _U512.modmulAssign(call_, m1_, three_);
+            _U512.modaddAssign(m1_, a_, p_);
+
+            uint256 m2_ = _U512.modshl1(y1_, p_);
+            _U512.moddivAssign(call_, m1_, m2_);
+
+            x2_ = _U512.modexp(call_, m1_, 2);
+            _U512.modsubAssign(x2_, x1_, p_);
+            _U512.modsubAssign(x2_, x1_, p_);
+
+            y2_ = _U512.modsub(x1_, x2_, p_);
+            _U512.modmulAssign(call_, y2_, m1_);
+            _U512.modsubAssign(y2_, y1_, p_);
+        }
+    }
+
+    /**
+     * @dev Doubles an elliptic curve point 3 times in affine coordinates.
+     */
+    function _twice3Affine(
+        uint256 call_,
+        uint256 p_,
+        uint256 three_,
+        uint256 a_,
+        uint256 x1_,
+        uint256 y1_
+    ) private view returns (uint256 x2_, uint256 y2_) {
+        unchecked {
+            if (x1_ == 0) {
+                return (0, 0);
+            }
+
+            if (_U512.eqUint256(y1_, 0)) {
+                return (0, 0);
+            }
+
+            uint256 m1 = _U512.modexp(call_, x1_, 2);
+            _U512.modmulAssign(call_, m1, three_);
+            _U512.modaddAssign(m1, a_, p_);
+
+            uint256 m2 = _U512.modshl1(y1_, p_);
+            _U512.moddivAssign(call_, m1, m2);
+
+            x2_ = _U512.modexp(call_, m1, 2);
+            _U512.modsubAssign(x2_, x1_, p_);
+            _U512.modsubAssign(x2_, x1_, p_);
+
+            y2_ = _U512.modsub(x1_, x2_, p_);
+            _U512.modmulAssign(call_, y2_, m1);
+            _U512.modsubAssign(y2_, y1_, p_);
+
+            if (_U512.eqUint256(y2_, 0)) {
+                return (0, 0);
+            }
+
+            _U512.modexpAssignTo(call_, m1, x2_, 2);
+            _U512.modmulAssign(call_, m1, three_);
+            _U512.modaddAssign(m1, a_, p_);
+
+            _U512.modshl1AssignTo(m2, y2_, p_);
+            _U512.moddivAssign(call_, m1, m2);
+
+            _U512.modexpAssignTo(call_, x1_, m1, 2);
+            _U512.modsubAssign(x1_, x2_, p_);
+            _U512.modsubAssign(x1_, x2_, p_);
+
+            _U512.modsubAssignTo(y1_, x2_, x1_, p_);
+            _U512.modmulAssign(call_, y1_, m1);
+            _U512.modsubAssign(y1_, y2_, p_);
+
+            if (_U512.eqUint256(y1_, 0)) {
+                return (0, 0);
+            }
+
+            _U512.modexpAssignTo(call_, m1, x1_, 2);
+            _U512.modmulAssign(call_, m1, three_);
+            _U512.modaddAssign(m1, a_, p_);
+
+            _U512.modshl1AssignTo(m2, y1_, p_);
+            _U512.moddivAssign(call_, m1, m2);
+
+            _U512.modexpAssignTo(call_, x2_, m1, 2);
+            _U512.modsubAssign(x2_, x1_, p_);
+            _U512.modsubAssign(x2_, x1_, p_);
+
+            _U512.modsubAssignTo(y2_, x1_, x2_, p_);
+            _U512.modmulAssign(call_, y2_, m1);
+            _U512.modsubAssign(y2_, y1_, p_);
+        }
+    }
+
+    /**
+     * @dev Add two elliptic curve points in affine coordinates.
+     */
+    function _addAffine(
+        uint256 call_,
+        uint256 p_,
+        uint256 three_,
+        uint256 a_,
+        uint256 x1_,
+        uint256 y1_,
+        uint256 x2_,
+        uint256 y2_
+    ) private view returns (uint256 x3, uint256 y3) {
+        unchecked {
+            if (x1_ == 0 || x2_ == 0) {
+                if (x1_ == 0 && x2_ == 0) {
+                    return (0, 0);
+                }
+
+                return
+                    x1_ == 0
+                        ? (_U512.copy(x2_), _U512.copy(y2_))
+                        : (_U512.copy(x1_), _U512.copy(y1_));
+            }
+
+            if (_U512.eq(x1_, x2_)) {
+                if (_U512.eq(y1_, y2_)) {
+                    return _twiceAffine(call_, p_, three_, a_, x1_, y1_);
+                }
+
+                return (0, 0);
+            }
+
+            uint256 m1_ = _U512.modsub(y1_, y2_, p_);
+            uint256 m2_ = _U512.modsub(x1_, x2_, p_);
+
+            _U512.moddivAssign(call_, m1_, m2_);
+
+            x3 = _U512.modexp(call_, m1_, 2);
+            _U512.modsubAssign(x3, x1_, p_);
+            _U512.modsubAssign(x3, x2_, p_);
+
+            y3 = _U512.modsub(x1_, x3, p_);
+            _U512.modmulAssign(call_, y3, m1_);
+            _U512.modsubAssign(y3, y1_, p_);
+        }
+    }
+
+    function _precomputePointsTable(
+        uint256 call_,
+        uint256 p_,
+        uint256 three_,
+        uint256 a_,
+        uint256 hx_,
+        uint256 hy_,
+        uint256 gx_,
+        uint256 gy_
+    ) private view returns (uint256[2][64] memory points_) {
+        unchecked {
+            (points_[0x01][0], points_[0x01][1]) = (_U512.copy(hx_), _U512.copy(hy_));
+            (points_[0x08][0], points_[0x08][1]) = (_U512.copy(gx_), _U512.copy(gy_));
+
+            for (uint256 i = 0; i < 8; ++i) {
+                for (uint256 j = 0; j < 8; ++j) {
+                    if (i + j < 2) {
+                        continue;
+                    }
+
+                    uint256[2] memory pointTo_ = points_[(i << 3) | j];
+
+                    if (i != 0) {
+                        uint256[2] memory pointFrom_ = points_[((i - 1) << 3) | j];
+
+                        (pointTo_[0], pointTo_[1]) = _addAffine(
+                            call_,
+                            p_,
+                            three_,
+                            a_,
+                            pointFrom_[0],
+                            pointFrom_[1],
+                            gx_,
+                            gy_
+                        );
+                    } else {
+                        uint256[2] memory pointFrom_ = points_[(i << 3) | (j - 1)];
+
+                        (pointTo_[0], pointTo_[1]) = _addAffine(
+                            call_,
+                            p_,
+                            three_,
+                            a_,
+                            pointFrom_[0],
+                            pointFrom_[1],
+                            hx_,
+                            hy_
+                        );
+                    }
+                }
+            }
+
+            return points_;
+        }
+    }
+
+    /**
+     * @dev Convert 96 bytes to two 384-bit unsigned integers.
+     */
+    function _u384FromBytes2(bytes memory bytes_) private view returns (uint256, uint256) {
+        unchecked {
+            bytes memory lhs_ = new bytes(48);
+            bytes memory rhs_ = new bytes(48);
+
+            MemoryUtils.unsafeCopy(bytes_.getDataPointer(), lhs_.getDataPointer(), 48);
+            MemoryUtils.unsafeCopy(bytes_.getDataPointer() + 48, rhs_.getDataPointer(), 48);
+
+            return (_U512.fromBytes(lhs_), _U512.fromBytes(rhs_));
+        }
+    }
+}
diff --git a/contracts/libs/crypto/backend/U512.sol b/contracts/libs/crypto/backend/U512.sol
new file mode 100644
index 00000000..fe28ad68
--- /dev/null
+++ b/contracts/libs/crypto/backend/U512.sol
@@ -0,0 +1,498 @@
+// SPDX-License-Identifier: MIT
+pragma solidity ^0.8.4;
+
+/**
+ * @notice Low-level utility library that implements unsigned 384-bit arithmetics.
+ *
+ * Serves for internal purposes only.
+ */
+library _U512 {
+    uint256 private constant _UINT384_ALLOCATION = 64;
+    uint256 private constant _CALL_ALLOCATION = 4 * 288;
+    uint256 private constant _MUL_OFFSET = 288;
+    uint256 private constant _EXP_OFFSET = 2 * 288;
+    uint256 private constant _INV_OFFSET = 3 * 288;
+
+    function initCall(uint256 m_) internal pure returns (uint256 handler_) {
+        unchecked {
+            handler_ = _allocate(_CALL_ALLOCATION);
+
+            _sub(m_, fromUint256(2), handler_ + _INV_OFFSET + 0xA0);
+
+            assembly {
+                let call_ := add(handler_, _MUL_OFFSET)
+
+                mstore(call_, 0x60)
+                mstore(add(0x20, call_), 0x20)
+                mstore(add(0x40, call_), 0x40)
+                mstore(add(0xC0, call_), 0x01)
+                mstore(add(0xE0, call_), mload(m_))
+                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
+
+                call_ := add(handler_, _EXP_OFFSET)
+
+                mstore(call_, 0x40)
+                mstore(add(0x20, call_), 0x20)
+                mstore(add(0x40, call_), 0x40)
+                mstore(add(0xC0, call_), mload(m_))
+                mstore(add(0xE0, call_), mload(add(m_, 0x20)))
+
+                call_ := add(handler_, _INV_OFFSET)
+
+                mstore(call_, 0x40)
+                mstore(add(0x20, call_), 0x40)
+                mstore(add(0x40, call_), 0x40)
+                mstore(add(0xE0, call_), mload(m_))
+                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
+            }
+        }
+    }
+
+    function fromUint256(uint256 u256_) internal pure returns (uint256 handler_) {
+        unchecked {
+            handler_ = _allocate(_UINT384_ALLOCATION);
+
+            assembly {
+                mstore(handler_, 0x00)
+                mstore(add(handler_, 0x20), u256_)
+            }
+        }
+    }
+
+    function fromBytes(bytes memory bytes_) internal view returns (uint256 handler_) {
+        unchecked {
+            assert(bytes_.length < 49);
+
+            handler_ = _allocate(_UINT384_ALLOCATION);
+
+            assembly {
+                mstore(handler_, 0)
+                mstore(add(handler_, 0x20), 0)
+
+                let size_ := mload(bytes_)
+                pop(
+                    staticcall(
+                        gas(),
+                        0x4,
+                        add(bytes_, 0x20),
+                        size_,
+                        add(handler_, sub(0x40, size_)),
+                        size_
+                    )
+                )
+            }
+        }
+    }
+
+    function copy(uint256 handler_) internal pure returns (uint256 handlerCopy_) {
+        unchecked {
+            handlerCopy_ = _allocate(_UINT384_ALLOCATION);
+
+            assembly {
+                mstore(handlerCopy_, mload(handler_))
+                mstore(add(handlerCopy_, 0x20), mload(add(handler_, 0x20)))
+            }
+
+            return handlerCopy_;
+        }
+    }
+
+    function eq(uint256 a_, uint256 b_) internal pure returns (bool eq_) {
+        assembly {
+            eq_ := and(eq(mload(a_), mload(b_)), eq(mload(add(a_, 0x20)), mload(add(b_, 0x20))))
+        }
+    }
+
+    function eqUint256(uint256 a_, uint256 bInteger_) internal pure returns (bool eq_) {
+        assembly {
+            eq_ := and(eq(mload(a_), 0), eq(mload(add(a_, 0x20)), bInteger_))
+        }
+    }
+
+    function cmp(uint256 a_, uint256 b_) internal pure returns (int256) {
+        unchecked {
+            uint256 aWord_;
+            uint256 bWord_;
+
+            assembly {
+                aWord_ := mload(a_)
+                bWord_ := mload(b_)
+            }
+
+            if (aWord_ > bWord_) {
+                return 1;
+            }
+
+            if (aWord_ < bWord_) {
+                return -1;
+            }
+
+            assembly {
+                aWord_ := mload(add(a_, 0x20))
+                bWord_ := mload(add(b_, 0x20))
+            }
+
+            if (aWord_ > bWord_) {
+                return 1;
+            }
+
+            if (aWord_ < bWord_) {
+                return -1;
+            }
+
+            return 0;
+        }
+    }
+
+    function modAssign(uint256 call_, uint256 a_, uint256 m_) internal view {
+        assembly {
+            mstore(call_, 0x40)
+            mstore(add(0x20, call_), 0x20)
+            mstore(add(0x40, call_), 0x40)
+            mstore(add(0x60, call_), mload(a_))
+            mstore(add(0x80, call_), mload(add(a_, 0x20)))
+            mstore(add(0xA0, call_), 0x01)
+            mstore(add(0xC0, call_), mload(m_))
+            mstore(add(0xE0, call_), mload(add(m_, 0x20)))
+
+            pop(staticcall(gas(), 0x5, call_, 0x0100, a_, 0x40))
+        }
+    }
+
+    function modexp(
+        uint256 call_,
+        uint256 b_,
+        uint256 eInteger_
+    ) internal view returns (uint256 r_) {
+        unchecked {
+            r_ = _allocate(_UINT384_ALLOCATION);
+
+            assembly {
+                call_ := add(call_, _EXP_OFFSET)
+
+                mstore(add(0x60, call_), mload(b_))
+                mstore(add(0x80, call_), mload(add(b_, 0x20)))
+                mstore(add(0xA0, call_), eInteger_)
+
+                pop(staticcall(gas(), 0x5, call_, 0x0100, r_, 0x40))
+            }
+
+            return r_;
+        }
+    }
+
+    function modexpAssignTo(
+        uint256 call_,
+        uint256 to_,
+        uint256 b_,
+        uint256 eInteger_
+    ) internal view {
+        assembly {
+            call_ := add(call_, _EXP_OFFSET)
+
+            mstore(add(0x60, call_), mload(b_))
+            mstore(add(0x80, call_), mload(add(b_, 0x20)))
+            mstore(add(0xA0, call_), eInteger_)
+
+            pop(staticcall(gas(), 0x5, call_, 0x0100, to_, 0x40))
+        }
+    }
+
+    function modadd(uint256 a_, uint256 b_, uint256 m_) internal pure returns (uint256 r_) {
+        unchecked {
+            r_ = _allocate(_UINT384_ALLOCATION);
+
+            _add(a_, b_, r_);
+
+            if (cmp(r_, m_) >= 0) {
+                _subFrom(r_, m_);
+            }
+
+            return r_;
+        }
+    }
+
+    function modaddAssign(uint256 a_, uint256 b_, uint256 m_) internal pure {
+        unchecked {
+            _addTo(a_, b_);
+
+            if (cmp(a_, m_) >= 0) {
+                return _subFrom(a_, m_);
+            }
+        }
+    }
+
+    function modmul(uint256 call_, uint256 a_, uint256 b_) internal view returns (uint256 r_) {
+        unchecked {
+            r_ = _allocate(_UINT384_ALLOCATION);
+
+            _mul(a_, b_, call_ + _MUL_OFFSET + 0x60);
+
+            assembly {
+                call_ := add(call_, _MUL_OFFSET)
+
+                pop(staticcall(gas(), 0x5, call_, 0x0120, r_, 0x40))
+            }
+
+            return r_;
+        }
+    }
+
+    function modmulAssign(uint256 call_, uint256 a_, uint256 b_) internal view {
+        unchecked {
+            _mul(a_, b_, call_ + _MUL_OFFSET + 0x60);
+
+            assembly {
+                call_ := add(call_, _MUL_OFFSET)
+
+                pop(staticcall(gas(), 0x5, call_, 0x0120, a_, 0x40))
+            }
+        }
+    }
+
+    function modsub(uint256 a_, uint256 b_, uint256 m_) internal pure returns (uint256 r_) {
+        unchecked {
+            r_ = _allocate(_UINT384_ALLOCATION);
+
+            if (cmp(a_, b_) >= 0) {
+                _sub(a_, b_, r_);
+                return r_;
+            }
+
+            _add(a_, m_, r_);
+            _subFrom(r_, b_);
+        }
+    }
+
+    function modsubAssign(uint256 a_, uint256 b_, uint256 m_) internal pure {
+        unchecked {
+            if (cmp(a_, b_) >= 0) {
+                _subFrom(a_, b_);
+                return;
+            }
+
+            _addTo(a_, m_);
+            _subFrom(a_, b_);
+        }
+    }
+
+    function modsubAssignTo(uint256 to_, uint256 a_, uint256 b_, uint256 m_) internal pure {
+        unchecked {
+            if (cmp(a_, b_) >= 0) {
+                _sub(a_, b_, to_);
+                return;
+            }
+
+            _add(a_, m_, to_);
+            _subFrom(to_, b_);
+        }
+    }
+
+    function modshl1(uint256 a_, uint256 m_) internal pure returns (uint256 r_) {
+        unchecked {
+            r_ = _allocate(_UINT384_ALLOCATION);
+
+            _shl1(a_, r_);
+
+            if (cmp(r_, m_) >= 0) {
+                _subFrom(r_, m_);
+            }
+
+            return r_;
+        }
+    }
+
+    function modshl1AssignTo(uint256 to_, uint256 a_, uint256 m_) internal pure {
+        unchecked {
+            _shl1(a_, to_);
+
+            if (cmp(to_, m_) >= 0) {
+                _subFrom(to_, m_);
+            }
+        }
+    }
+
+    /// @dev Stores modinv into `b_` and moddiv into `a_`.
+    function moddivAssign(uint256 call_, uint256 a_, uint256 b_) internal view {
+        unchecked {
+            assembly {
+                call_ := add(call_, _INV_OFFSET)
+
+                mstore(add(0x60, call_), mload(b_))
+                mstore(add(0x80, call_), mload(add(b_, 0x20)))
+
+                pop(staticcall(gas(), 0x5, call_, 0x0120, b_, 0x40))
+            }
+
+            modmulAssign(call_ - _INV_OFFSET, a_, b_);
+        }
+    }
+
+    function moddiv(
+        uint256 call_,
+        uint256 a_,
+        uint256 b_,
+        uint256 m_
+    ) internal view returns (uint256 r_) {
+        unchecked {
+            r_ = modinv(call_, b_, m_);
+
+            _mul(a_, r_, call_ + 0x60);
+
+            assembly {
+                mstore(call_, 0x60)
+                mstore(add(0x20, call_), 0x20)
+                mstore(add(0x40, call_), 0x40)
+                mstore(add(0xC0, call_), 0x01)
+                mstore(add(0xE0, call_), mload(m_))
+                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
+
+                pop(staticcall(gas(), 0x5, call_, 0x0120, r_, 0x40))
+            }
+        }
+    }
+
+    function modinv(uint256 call_, uint256 b_, uint256 m_) internal view returns (uint256 r_) {
+        unchecked {
+            r_ = _allocate(_UINT384_ALLOCATION);
+
+            _sub(m_, fromUint256(2), call_ + 0xA0);
+
+            assembly {
+                mstore(call_, 0x40)
+                mstore(add(0x20, call_), 0x40)
+                mstore(add(0x40, call_), 0x40)
+                mstore(add(0x60, call_), mload(b_))
+                mstore(add(0x80, call_), mload(add(b_, 0x20)))
+                mstore(add(0xE0, call_), mload(m_))
+                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
+
+                pop(staticcall(gas(), 0x5, call_, 0x0120, r_, 0x40))
+            }
+        }
+    }
+
+    function _shl1(uint256 a_, uint256 r_) internal pure {
+        assembly {
+            let a1_ := mload(add(a_, 0x20))
+
+            mstore(r_, or(shl(1, mload(a_)), shr(255, a1_)))
+            mstore(add(r_, 0x20), shl(1, a1_))
+        }
+    }
+
+    function _add(uint256 a_, uint256 b_, uint256 r_) private pure {
+        assembly {
+            let aWord_ := mload(add(a_, 0x20))
+            let sum_ := add(aWord_, mload(add(b_, 0x20)))
+
+            mstore(add(r_, 0x20), sum_)
+
+            sum_ := gt(aWord_, sum_)
+            sum_ := add(sum_, add(mload(a_), mload(b_)))
+
+            mstore(r_, sum_)
+        }
+    }
+
+    function _sub(uint256 a_, uint256 b_, uint256 r_) private pure {
+        assembly {
+            let aWord_ := mload(add(a_, 0x20))
+            let diff_ := sub(aWord_, mload(add(b_, 0x20)))
+
+            mstore(add(r_, 0x20), diff_)
+
+            diff_ := gt(diff_, aWord_)
+            diff_ := sub(sub(mload(a_), mload(b_)), diff_)
+
+            mstore(r_, diff_)
+        }
+    }
+
+    function _subFrom(uint256 a_, uint256 b_) private pure {
+        assembly {
+            let aWord_ := mload(add(a_, 0x20))
+            let diff_ := sub(aWord_, mload(add(b_, 0x20)))
+
+            mstore(add(a_, 0x20), diff_)
+
+            diff_ := gt(diff_, aWord_)
+            diff_ := sub(sub(mload(a_), mload(b_)), diff_)
+
+            mstore(a_, diff_)
+        }
+    }
+
+    function _addTo(uint256 a_, uint256 b_) private pure {
+        assembly {
+            let aWord_ := mload(add(a_, 0x20))
+            let sum_ := add(aWord_, mload(add(b_, 0x20)))
+
+            mstore(add(a_, 0x20), sum_)
+
+            sum_ := gt(aWord_, sum_)
+            sum_ := add(sum_, add(mload(a_), mload(b_)))
+
+            mstore(a_, sum_)
+        }
+    }
+
+    function _mul(uint256 a_, uint256 b_, uint256 r_) private pure {
+        unchecked {
+            assembly {
+                let a0_ := mload(a_)
+                let a1_ := mload(add(a_, 0x20))
+                let b0_ := mload(b_)
+                let b1_ := mload(add(b_, 0x20))
+
+                let mm_ := mulmod(
+                    a1_,
+                    b1_,
+                    0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+                )
+                let c3_ := mul(a1_, b1_)
+                let c2_ := sub(sub(mm_, c3_), lt(mm_, c3_))
+
+                mm_ := mulmod(
+                    a0_,
+                    b1_,
+                    0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+                )
+                let prod1_ := mul(a0_, b1_)
+                let prod0_ := sub(sub(mm_, prod1_), lt(mm_, prod1_))
+
+                c2_ := add(c2_, prod1_)
+                let c1_ := lt(c2_, prod1_)
+                c1_ := add(c1_, prod0_)
+
+                mm_ := mulmod(
+                    a1_,
+                    b0_,
+                    0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+                )
+                prod1_ := mul(a1_, b0_)
+                prod0_ := sub(sub(mm_, prod1_), lt(mm_, prod1_))
+
+                c2_ := add(c2_, prod1_)
+                c1_ := add(c1_, lt(c2_, prod1_))
+                c1_ := add(c1_, prod0_)
+                c1_ := add(c1_, mul(a0_, b0_))
+
+                mstore(add(r_, 0x40), c3_)
+                mstore(add(r_, 0x20), c2_)
+                mstore(r_, c1_)
+            }
+        }
+    }
+
+    function _allocate(uint256 bytes_) private pure returns (uint256 handler_) {
+        unchecked {
+            assembly {
+                handler_ := mload(0x40)
+                mstore(0x40, add(handler_, bytes_))
+            }
+
+            return handler_;
+        }
+    }
+}
diff --git a/contracts/mock/libs/crypto/bn/U512Mock.sol b/contracts/mock/libs/bn/U512Mock.sol
similarity index 93%
rename from contracts/mock/libs/crypto/bn/U512Mock.sol
rename to contracts/mock/libs/bn/U512Mock.sol
index 3e4a8473..7c69ca31 100644
--- a/contracts/mock/libs/crypto/bn/U512Mock.sol
+++ b/contracts/mock/libs/bn/U512Mock.sol
@@ -1,9 +1,9 @@
 // SPDX-License-Identifier: MIT
 pragma solidity ^0.8.4;
 
-import {call} from "../../../../libs/bn/U512.sol";
-import {uint512} from "../../../../libs/bn/U512.sol";
-import {U512} from "../../../../libs/bn/U512.sol";
+import {call} from "../../../libs/bn/U512.sol";
+import {uint512} from "../../../libs/bn/U512.sol";
+import {U512} from "../../../libs/bn/U512.sol";
 
 contract U512Mock {
     using U512 for *;
diff --git a/contracts/mock/libs/crypto/ECDSA512Mock.sol b/contracts/mock/libs/crypto/ECDSA512Mock.sol
new file mode 100644
index 00000000..82a6d68e
--- /dev/null
+++ b/contracts/mock/libs/crypto/ECDSA512Mock.sol
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: MIT
+pragma solidity ^0.8.4;
+
+import {ECDSA512} from "../../../libs/crypto/ECDSA512.sol";
+
+contract ECDSA512Mock {
+    using ECDSA512 for *;
+
+    ECDSA512.Parameters private _brainpoolP512r1CurveParams =
+        ECDSA512.Parameters({
+            a: hex"7830a3318b603b89e2327145ac234cc594cbdd8d3df91610a83441caea9863bc2ded5d5aa8253aa10a2ef1c98b9ac8b57f1117a72bf2c7b9e7c1ac4d77fc94ca",
+            b: hex"3df91610a83441caea9863bc2ded5d5aa8253aa10a2ef1c98b9ac8b57f1117a72bf2c7b9e7c1ac4d77fc94cadc083e67984050b75ebae5dd2809bd638016f723",
+            gx: hex"81aee4bdd82ed9645a21322e9c4c6a9385ed9f70b5d916c1b43b62eef4d0098eff3b1f78e2d0d48d50d1687b93b97d5f7c6d5047406a5e688b352209bcb9f822",
+            gy: hex"7dde385d566332ecc0eabfa9cf7822fdf209f70024a57b1aa000c55b881f8111b2dcde494a5f485e5bca4bd88a2763aed1ca2b2fa8f0540678cd1e0f3ad80892",
+            p: hex"aadd9db8dbe9c48b3fd4e6ae33c9fc07cb308db3b3c9d20ed6639cca703308717d4d9b009bc66842aecda12ae6a380e62881ff2f2d82c68528aa6056583a48f3",
+            n: hex"aadd9db8dbe9c48b3fd4e6ae33c9fc07cb308db3b3c9d20ed6639cca70330870553e5c414ca92619418661197fac10471db1d381085ddaddb58796829ca90069",
+            lowSmax: hex"556ecedc6df4e2459fea735719e4fe03e59846d9d9e4e9076b31ce65381984382a9f2e20a654930ca0c3308cbfd608238ed8e9c0842eed6edac3cb414e548034"
+        });
+
+    function verifyBrainpoolP512r1WithoutHashing(
+        bytes calldata message_,
+        bytes calldata signature_,
+        bytes calldata pubKey_
+    ) external view returns (bool) {
+        return _brainpoolP512r1CurveParams.verify(abi.encodePacked(message_), signature_, pubKey_);
+    }
+}
diff --git a/test/libs/crypto/bn/U512.test.ts b/test/libs/bn/U512.test.ts
similarity index 68%
rename from test/libs/crypto/bn/U512.test.ts
rename to test/libs/bn/U512.test.ts
index fb4e1d1e..f9258137 100644
--- a/test/libs/crypto/bn/U512.test.ts
+++ b/test/libs/bn/U512.test.ts
@@ -48,6 +48,31 @@ describe.only("U512", () => {
   afterEach(reverter.revert);
 
   it("modadd test", async () => {
+    const lowSMAX =
+      BigInt(
+        "0xAADD9DB8DBE9C48B3FD4E6AE33C9FC07CB308DB3B3C9D20ED6639CCA70330870553E5C414CA92619418661197FAC10471DB1D381085DDADDB58796829CA90069",
+      ) / 2n;
+    console.log(lowSMAX.toString(16));
+
+    console.log(
+      "7830A3318B603B89E2327145AC234CC594CBDD8D3DF91610A83441CAEA9863BC2DED5D5AA8253AA10A2EF1C98B9AC8B57F1117A72BF2C7B9E7C1AC4D77FC94CA".toLowerCase(),
+    );
+    console.log(
+      "3DF91610A83441CAEA9863BC2DED5D5AA8253AA10A2EF1C98B9AC8B57F1117A72BF2C7B9E7C1AC4D77FC94CADC083E67984050B75EBAE5DD2809BD638016F723".toLowerCase(),
+    );
+    console.log(
+      "81AEE4BDD82ED9645A21322E9C4C6A9385ED9F70B5D916C1B43B62EEF4D0098EFF3B1F78E2D0D48D50D1687B93B97D5F7C6D5047406A5E688B352209BCB9F822".toLowerCase(),
+    );
+    console.log(
+      "7DDE385D566332ECC0EABFA9CF7822FDF209F70024A57B1AA000C55B881F8111B2DCDE494A5F485E5BCA4BD88A2763AED1CA2B2FA8F0540678CD1E0F3AD80892".toLowerCase(),
+    );
+    console.log(
+      "AADD9DB8DBE9C48B3FD4E6AE33C9FC07CB308DB3B3C9D20ED6639CCA703308717D4D9B009BC66842AECDA12AE6A380E62881FF2F2D82C68528AA6056583A48F3".toLowerCase(),
+    );
+    console.log(
+      "AADD9DB8DBE9C48B3FD4E6AE33C9FC07CB308DB3B3C9D20ED6639CCA70330870553E5C414CA92619418661197FAC10471DB1D381085DDADDB58796829CA90069".toLowerCase(),
+    );
+
     for (let i = 0; i < 100; ++i) {
       const a = randomU512();
       const b = randomU512();
diff --git a/test/libs/crypto/ECDSA512.test.ts b/test/libs/crypto/ECDSA512.test.ts
new file mode 100644
index 00000000..5b235a5d
--- /dev/null
+++ b/test/libs/crypto/ECDSA512.test.ts
@@ -0,0 +1,34 @@
+import { ethers } from "hardhat";
+import { expect } from "chai";
+import { Reverter } from "@/test/helpers/reverter";
+
+import { ECDSA512Mock } from "@ethers-v6";
+
+describe("ECDSA512", () => {
+  const reverter = new Reverter();
+
+  let ecdsa512: ECDSA512Mock;
+
+  before(async () => {
+    const ECDSA512Mock = await ethers.getContractFactory("ECDSA512Mock");
+
+    ecdsa512 = await ECDSA512Mock.deploy();
+
+    await reverter.snapshot();
+  });
+
+  afterEach(reverter.revert);
+
+  describe.only("brainpoolP512r1", () => {
+    const signature =
+      "0x0bd2593447cc6c02caf99d60418dd42e9a194c910e6755ed0c7059acac656b04ccfe1e8348462ee43066823aee2fed7ca012e9890dfb69866d7ae88b6506f9c744b42304e693796618d090dbcb2a2551c3cb78534611e61fd9d1a5c0938b5b8ec6ed53d2d28999eabbd8e7792d167fcf582492403a6a0f7cc94c73a28fb76b71";
+    const pubKey =
+      "0x67cea1bedf84cbdcba69a05bb2ce3a2d1c9d911d236c480929a16ad697b45a6ca127079fe8d7868671e28ef33bdf9319e2e51c84b190ac5c91b51baf0a980ba500a7e79006194b5378f65cbe625ef2c47c64e56040d873b995b5b1ebaa4a6ce971da164391ff619af3bcfc71c5e1ad27ee0e859c2943e2de8ef7c43d3c976e9b";
+    const message =
+      "0x43f800fbeaf9238c58af795bcdad04bc49cd850c394d3382953356b023210281757b30e19218a37cbd612086fbc158caa8b4e1acb2ec00837e5d941f342fb3cc";
+
+    it("should verify the signature", async () => {
+      expect(await ecdsa512.verifyBrainpoolP512r1WithoutHashing(message, signature, pubKey)).to.be.true;
+    });
+  });
+});

From 2c5dd2d18f75c259113b9e175846ebe3392fad12 Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Mon, 20 Jan 2025 19:42:53 +0200
Subject: [PATCH 15/42] added test vectors, tests are failed

---
 contracts/libs/crypto/ECDSA512.sol | 396 +++++++++++++----------------
 test/libs/bn/U512.test.ts          |  27 +-
 2 files changed, 174 insertions(+), 249 deletions(-)

diff --git a/contracts/libs/crypto/ECDSA512.sol b/contracts/libs/crypto/ECDSA512.sol
index 0616effc..ea882487 100644
--- a/contracts/libs/crypto/ECDSA512.sol
+++ b/contracts/libs/crypto/ECDSA512.sol
@@ -1,17 +1,18 @@
 // SPDX-License-Identifier: MIT
 pragma solidity ^0.8.4;
 
+import {call, uint512} from "../bn/U512.sol";
+import {U512} from "../bn/U512.sol";
 import {MemoryUtils} from "../utils/MemoryUtils.sol";
-import {_U512} from "./backend/U512.sol";
 
 /**
  * @notice Cryptography module
  *
  * This library provides functionality for ECDSA verification over any 384-bit curve. Currently,
- * this is the most efficient implementation out there, consuming ~7.767 million gas per call.
+ * this is the most efficient implementation out there, consuming ~8.025 million gas per call.
  *
  * The approach is Strauss-Shamir double scalar multiplication with 6 bits of precompute + affine coordinates.
- * For reference, naive implementation uses ~400 billion gas, which is ~50000 times more expensive.
+ * For reference, naive implementation uses ~400 billion gas, which is 50000 times more expensive.
  *
  * We also tried using projective coordinates, however, the gas consumption rose to ~9 million gas.
  */
@@ -32,20 +33,20 @@ library ECDSA512 {
     }
 
     struct _Parameters {
-        uint256 a;
-        uint256 b;
-        uint256 gx;
-        uint256 gy;
-        uint256 p;
-        uint256 n;
-        uint256 lowSmax;
+        uint512 a;
+        uint512 b;
+        uint512 gx;
+        uint512 gy;
+        uint512 p;
+        uint512 n;
+        uint512 lowSmax;
     }
 
     struct _Inputs {
-        uint256 r;
-        uint256 s;
-        uint256 x;
-        uint256 y;
+        uint512 r;
+        uint512 s;
+        uint512 x;
+        uint512 y;
     }
 
     /**
@@ -67,27 +68,27 @@ library ECDSA512 {
         unchecked {
             _Inputs memory inputs_;
 
-            (inputs_.r, inputs_.s) = _u384FromBytes2(signature_);
-            (inputs_.x, inputs_.y) = _u384FromBytes2(pubKey_);
+            (inputs_.r, inputs_.s) = _u512FromBytes2(signature_);
+            (inputs_.x, inputs_.y) = _u512FromBytes2(pubKey_);
 
             _Parameters memory params_ = _Parameters({
-                a: _U512.fromBytes(curveParams_.a),
-                b: _U512.fromBytes(curveParams_.b),
-                gx: _U512.fromBytes(curveParams_.gx),
-                gy: _U512.fromBytes(curveParams_.gy),
-                p: _U512.fromBytes(curveParams_.p),
-                n: _U512.fromBytes(curveParams_.n),
-                lowSmax: _U512.fromBytes(curveParams_.lowSmax)
+                a: U512.fromBytes(curveParams_.a),
+                b: U512.fromBytes(curveParams_.b),
+                gx: U512.fromBytes(curveParams_.gx),
+                gy: U512.fromBytes(curveParams_.gy),
+                p: U512.fromBytes(curveParams_.p),
+                n: U512.fromBytes(curveParams_.n),
+                lowSmax: U512.fromBytes(curveParams_.lowSmax)
             });
 
-            uint256 call_ = _U512.initCall(params_.p);
+            call call_ = U512.initCall();
 
             /// accept s only from the lower part of the curve
             if (
-                _U512.eqUint256(inputs_.r, 0) ||
-                _U512.cmp(inputs_.r, params_.n) >= 0 ||
-                _U512.eqUint256(inputs_.s, 0) ||
-                _U512.cmp(inputs_.s, params_.lowSmax) > 0
+                U512.eqUint256(inputs_.r, 0) ||
+                U512.cmp(inputs_.r, params_.n) >= 0 ||
+                U512.eqUint256(inputs_.s, 0) ||
+                U512.cmp(inputs_.s, params_.lowSmax) > 0
             ) {
                 return false;
             }
@@ -96,22 +97,21 @@ library ECDSA512 {
                 return false;
             }
 
-            uint256 scalar1_ = _U512.moddiv(
+            uint512 scalar1_ = U512.moddiv(
                 call_,
-                _U512.fromBytes(hashedMessage_),
+                U512.fromBytes(hashedMessage_),
                 inputs_.s,
                 params_.n
             );
-            uint256 scalar2_ = _U512.moddiv(call_, inputs_.r, inputs_.s, params_.n);
+            uint512 scalar2_ = U512.moddiv(call_, inputs_.r, inputs_.s, params_.n);
 
             {
-                uint256 three_ = _U512.fromUint256(3);
-
                 /// We use 6-bit masks where the first 3 bits refer to `scalar1` and the last 3 bits refer to `scalar2`.
-                uint256[2][64] memory points_ = _precomputePointsTable(
+                uint512[2][64] memory points_ = _precomputePointsTable(
                     call_,
                     params_.p,
-                    three_,
+                    U512.fromUint256(2),
+                    U512.fromUint256(3),
                     params_.a,
                     inputs_.x,
                     inputs_.y,
@@ -122,7 +122,8 @@ library ECDSA512 {
                 (scalar1_, ) = _doubleScalarMultiplication(
                     call_,
                     params_.p,
-                    three_,
+                    U512.fromUint256(2),
+                    U512.fromUint256(3),
                     params_.a,
                     points_,
                     scalar1_,
@@ -130,9 +131,9 @@ library ECDSA512 {
                 );
             }
 
-            _U512.modAssign(call_, scalar1_, params_.n);
+            U512.modAssign(call_, scalar1_, params_.n);
 
-            return _U512.eq(scalar1_, inputs_.r);
+            return U512.eq(scalar1_, inputs_.r);
         }
     }
 
@@ -140,35 +141,35 @@ library ECDSA512 {
      * @dev Check if a point in affine coordinates is on the curve.
      */
     function _isOnCurve(
-        uint256 call_,
-        uint256 p_,
-        uint256 a_,
-        uint256 b_,
-        uint256 x_,
-        uint256 y_
+        call call_,
+        uint512 p_,
+        uint512 a_,
+        uint512 b_,
+        uint512 x_,
+        uint512 y_
     ) private view returns (bool) {
         unchecked {
             if (
-                _U512.eqUint256(x_, 0) ||
-                _U512.eq(x_, p_) ||
-                _U512.eqUint256(y_, 0) ||
-                _U512.eq(y_, p_)
+                U512.eqUint256(x_, 0) ||
+                U512.eq(x_, p_) ||
+                U512.eqUint256(y_, 0) ||
+                U512.eq(y_, p_)
             ) {
                 return false;
             }
 
-            uint256 lhs_ = _U512.modexp(call_, y_, 2);
-            uint256 rhs_ = _U512.modexp(call_, x_, 3);
+            uint512 lhs_ = U512.modexp(call_, y_, U512.fromUint256(2), p_);
+            uint512 rhs_ = U512.modexp(call_, x_, U512.fromUint256(3), p_);
 
-            if (!_U512.eqUint256(a_, 0)) {
-                rhs_ = _U512.modadd(rhs_, _U512.modmul(call_, x_, a_), p_); // x^3 + a*x
+            if (!U512.eqUint256(a_, 0)) {
+                rhs_ = U512.modadd(call_, rhs_, U512.modmul(call_, x_, a_, p_), p_); // x^3 + a*x
             }
 
-            if (!_U512.eqUint256(b_, 0)) {
-                rhs_ = _U512.modadd(rhs_, b_, p_); // x^3 + a*x + b
+            if (!U512.eqUint256(b_, 0)) {
+                rhs_ = U512.modadd(call_, rhs_, b_, p_); // x^3 + a*x + b
             }
 
-            return _U512.eq(lhs_, rhs_);
+            return U512.eq(lhs_, rhs_);
         }
     }
 
@@ -176,30 +177,43 @@ library ECDSA512 {
      * @dev Compute the Strauss-Shamir double scalar multiplication scalar1*G + scalar2*H.
      */
     function _doubleScalarMultiplication(
-        uint256 call_,
-        uint256 p_,
-        uint256 three_,
-        uint256 a_,
-        uint256[2][64] memory points_,
-        uint256 scalar1_,
-        uint256 scalar2_
-    ) private view returns (uint256 x_, uint256 y_) {
+        call call_,
+        uint512 p_,
+        uint512 two_,
+        uint512 three_,
+        uint512 a_,
+        uint512[2][64] memory points_,
+        uint512 scalar1_,
+        uint512 scalar2_
+    ) private view returns (uint512 x_, uint512 y_) {
         unchecked {
             uint256 mask_;
             uint256 mask1_;
             uint256 mask2_;
 
-            for (uint256 bit = 3; bit <= 384; bit += 3) {
-                mask1_ = _getWord(scalar1_, 384 - bit);
-                mask2_ = _getWord(scalar2_, 384 - bit);
+            // skip first two bits
+            assembly {
+                mask1_ := shr(254, mload(scalar1_))
+                mask2_ := shr(254, mload(scalar2_))
+                mask_ := or(shl(3, mask1_), mask2_)
+            }
+
+            (x_, y_) = (U512.copy(points_[mask_][0]), U512.copy(points_[mask_][1]));
+
+            for (uint256 bit = 5; bit <= 512; bit += 3) {
+                mask1_ = _getWord(scalar1_, 512 - bit);
+                mask2_ = _getWord(scalar2_, 512 - bit);
 
                 mask_ = (mask1_ << 3) | mask2_;
 
                 if (mask_ != 0) {
-                    (x_, y_) = _twice3Affine(call_, p_, three_, a_, x_, y_);
+                    (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
+                    (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
+                    (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
                     (x_, y_) = _addAffine(
                         call_,
                         p_,
+                        two_,
                         three_,
                         a_,
                         points_[mask_][0],
@@ -214,7 +228,7 @@ library ECDSA512 {
         }
     }
 
-    function _getWord(uint256 scalar_, uint256 bit_) private pure returns (uint256) {
+    function _getWord(uint512 scalar_, uint256 bit_) private pure returns (uint256) {
         unchecked {
             uint256 word_;
             if (bit_ <= 253) {
@@ -225,123 +239,57 @@ library ECDSA512 {
                 return (word_ >> bit_) & 0x07;
             }
 
-            assembly {
-                word_ := mload(add(scalar_, 0x10))
-            }
-
-            return (word_ >> (bit_ - 128)) & 0x07;
-        }
-    }
+            if (bit_ <= 381) {
+                assembly {
+                    word_ := mload(add(scalar_, 0x10))
+                }
 
-    /**
-     * @dev Double an elliptic curve point in affine coordinates.
-     */
-    function _twiceAffine(
-        uint256 call_,
-        uint256 p_,
-        uint256 three_,
-        uint256 a_,
-        uint256 x1_,
-        uint256 y1_
-    ) private view returns (uint256 x2_, uint256 y2_) {
-        unchecked {
-            if (x1_ == 0) {
-                return (0, 0);
+                return (word_ >> (bit_ - 128)) & 0x07;
             }
 
-            if (_U512.eqUint256(y1_, 0)) {
-                return (0, 0);
+            assembly {
+                word_ := mload(scalar_)
             }
 
-            uint256 m1_ = _U512.modexp(call_, x1_, 2);
-            _U512.modmulAssign(call_, m1_, three_);
-            _U512.modaddAssign(m1_, a_, p_);
-
-            uint256 m2_ = _U512.modshl1(y1_, p_);
-            _U512.moddivAssign(call_, m1_, m2_);
-
-            x2_ = _U512.modexp(call_, m1_, 2);
-            _U512.modsubAssign(x2_, x1_, p_);
-            _U512.modsubAssign(x2_, x1_, p_);
-
-            y2_ = _U512.modsub(x1_, x2_, p_);
-            _U512.modmulAssign(call_, y2_, m1_);
-            _U512.modsubAssign(y2_, y1_, p_);
+            return (word_ >> (bit_ - 256)) & 0x07;
         }
     }
 
     /**
-     * @dev Doubles an elliptic curve point 3 times in affine coordinates.
+     * @dev Double an elliptic curve point in affine coordinates.
      */
-    function _twice3Affine(
-        uint256 call_,
-        uint256 p_,
-        uint256 three_,
-        uint256 a_,
-        uint256 x1_,
-        uint256 y1_
-    ) private view returns (uint256 x2_, uint256 y2_) {
+    function _twiceAffine(
+        call call_,
+        uint512 p_,
+        uint512 two_,
+        uint512 three_,
+        uint512 a_,
+        uint512 x1_,
+        uint512 y1_
+    ) private view returns (uint512 x2_, uint512 y2_) {
         unchecked {
-            if (x1_ == 0) {
-                return (0, 0);
-            }
-
-            if (_U512.eqUint256(y1_, 0)) {
-                return (0, 0);
-            }
-
-            uint256 m1 = _U512.modexp(call_, x1_, 2);
-            _U512.modmulAssign(call_, m1, three_);
-            _U512.modaddAssign(m1, a_, p_);
-
-            uint256 m2 = _U512.modshl1(y1_, p_);
-            _U512.moddivAssign(call_, m1, m2);
-
-            x2_ = _U512.modexp(call_, m1, 2);
-            _U512.modsubAssign(x2_, x1_, p_);
-            _U512.modsubAssign(x2_, x1_, p_);
-
-            y2_ = _U512.modsub(x1_, x2_, p_);
-            _U512.modmulAssign(call_, y2_, m1);
-            _U512.modsubAssign(y2_, y1_, p_);
-
-            if (_U512.eqUint256(y2_, 0)) {
-                return (0, 0);
+            if (U512.isNull(x1_)) {
+                return (x2_, y2_);
             }
 
-            _U512.modexpAssignTo(call_, m1, x2_, 2);
-            _U512.modmulAssign(call_, m1, three_);
-            _U512.modaddAssign(m1, a_, p_);
-
-            _U512.modshl1AssignTo(m2, y2_, p_);
-            _U512.moddivAssign(call_, m1, m2);
-
-            _U512.modexpAssignTo(call_, x1_, m1, 2);
-            _U512.modsubAssign(x1_, x2_, p_);
-            _U512.modsubAssign(x1_, x2_, p_);
-
-            _U512.modsubAssignTo(y1_, x2_, x1_, p_);
-            _U512.modmulAssign(call_, y1_, m1);
-            _U512.modsubAssign(y1_, y2_, p_);
-
-            if (_U512.eqUint256(y1_, 0)) {
-                return (0, 0);
+            if (U512.eqUint256(y1_, 0)) {
+                return (x2_, y2_);
             }
 
-            _U512.modexpAssignTo(call_, m1, x1_, 2);
-            _U512.modmulAssign(call_, m1, three_);
-            _U512.modaddAssign(m1, a_, p_);
+            uint512 m1_ = U512.modexp(call_, x1_, two_, p_);
+            U512.modmulAssign(call_, m1_, three_, p_);
+            U512.modaddAssign(call_, m1_, a_, p_);
 
-            _U512.modshl1AssignTo(m2, y1_, p_);
-            _U512.moddivAssign(call_, m1, m2);
+            uint512 m2_ = U512.modmul(call_, y1_, two_, p_);
+            U512.moddivAssign(call_, m1_, m2_, p_);
 
-            _U512.modexpAssignTo(call_, x2_, m1, 2);
-            _U512.modsubAssign(x2_, x1_, p_);
-            _U512.modsubAssign(x2_, x1_, p_);
+            x2_ = U512.modexp(call_, m1_, two_, p_);
+            U512.modsubAssign(call_, x2_, x1_, p_);
+            U512.modsubAssign(call_, x2_, x1_, p_);
 
-            _U512.modsubAssignTo(y2_, x1_, x2_, p_);
-            _U512.modmulAssign(call_, y2_, m1);
-            _U512.modsubAssign(y2_, y1_, p_);
+            y2_ = U512.modsub(call_, x1_, x2_, p_);
+            U512.modmulAssign(call_, y2_, m1_, p_);
+            U512.modsubAssign(call_, y2_, y1_, p_);
         }
     }
 
@@ -349,63 +297,65 @@ library ECDSA512 {
      * @dev Add two elliptic curve points in affine coordinates.
      */
     function _addAffine(
-        uint256 call_,
-        uint256 p_,
-        uint256 three_,
-        uint256 a_,
-        uint256 x1_,
-        uint256 y1_,
-        uint256 x2_,
-        uint256 y2_
-    ) private view returns (uint256 x3, uint256 y3) {
+        call call_,
+        uint512 p_,
+        uint512 two_,
+        uint512 three_,
+        uint512 a_,
+        uint512 x1_,
+        uint512 y1_,
+        uint512 x2_,
+        uint512 y2_
+    ) private view returns (uint512 x3, uint512 y3) {
         unchecked {
-            if (x1_ == 0 || x2_ == 0) {
-                if (x1_ == 0 && x2_ == 0) {
-                    return (0, 0);
+            if (U512.isNull(x1_) || U512.isNull(x2_)) {
+                if (U512.isNull(x1_) && U512.isNull(x2_)) {
+                    return (x3, y3);
                 }
 
                 return
-                    x1_ == 0
-                        ? (_U512.copy(x2_), _U512.copy(y2_))
-                        : (_U512.copy(x1_), _U512.copy(y1_));
+                    U512.isNull(x1_)
+                        ? (U512.copy(x2_), U512.copy(y2_))
+                        : (U512.copy(x1_), U512.copy(y1_));
             }
 
-            if (_U512.eq(x1_, x2_)) {
-                if (_U512.eq(y1_, y2_)) {
-                    return _twiceAffine(call_, p_, three_, a_, x1_, y1_);
+            if (U512.eq(x1_, x2_)) {
+                if (U512.eq(y1_, y2_)) {
+                    return _twiceAffine(call_, p_, two_, three_, a_, x1_, y1_);
                 }
 
-                return (0, 0);
+                return (x3, y3);
             }
 
-            uint256 m1_ = _U512.modsub(y1_, y2_, p_);
-            uint256 m2_ = _U512.modsub(x1_, x2_, p_);
+            uint512 m1_ = U512.modsub(call_, y1_, y2_, p_);
+            uint512 m2_ = U512.modsub(call_, x1_, x2_, p_);
 
-            _U512.moddivAssign(call_, m1_, m2_);
+            U512.moddivAssign(call_, m1_, m2_, p_);
 
-            x3 = _U512.modexp(call_, m1_, 2);
-            _U512.modsubAssign(x3, x1_, p_);
-            _U512.modsubAssign(x3, x2_, p_);
+            x3 = U512.modexp(call_, m1_, two_, p_);
+            U512.modsubAssign(call_, x3, x1_, p_);
+            U512.modsubAssign(call_, x3, x2_, p_);
 
-            y3 = _U512.modsub(x1_, x3, p_);
-            _U512.modmulAssign(call_, y3, m1_);
-            _U512.modsubAssign(y3, y1_, p_);
+            y3 = U512.modsub(call_, x1_, x3, p_);
+            U512.modmulAssign(call_, y3, m1_, p_);
+            U512.modsubAssign(call_, y3, y1_, p_);
         }
     }
 
     function _precomputePointsTable(
-        uint256 call_,
-        uint256 p_,
-        uint256 three_,
-        uint256 a_,
-        uint256 hx_,
-        uint256 hy_,
-        uint256 gx_,
-        uint256 gy_
-    ) private view returns (uint256[2][64] memory points_) {
+        call call_,
+        uint512 p_,
+        uint512 two_,
+        uint512 three_,
+        uint512 a_,
+        uint512 hx_,
+        uint512 hy_,
+        uint512 gx_,
+        uint512 gy_
+    ) private view returns (uint512[2][64] memory points_) {
         unchecked {
-            (points_[0x01][0], points_[0x01][1]) = (_U512.copy(hx_), _U512.copy(hy_));
-            (points_[0x08][0], points_[0x08][1]) = (_U512.copy(gx_), _U512.copy(gy_));
+            (points_[0x01][0], points_[0x01][1]) = (U512.copy(hx_), U512.copy(hy_));
+            (points_[0x08][0], points_[0x08][1]) = (U512.copy(gx_), U512.copy(gy_));
 
             for (uint256 i = 0; i < 8; ++i) {
                 for (uint256 j = 0; j < 8; ++j) {
@@ -413,31 +363,31 @@ library ECDSA512 {
                         continue;
                     }
 
-                    uint256[2] memory pointTo_ = points_[(i << 3) | j];
+                    uint256 maskTo = (i << 3) | j;
 
                     if (i != 0) {
-                        uint256[2] memory pointFrom_ = points_[((i - 1) << 3) | j];
+                        uint256 maskFrom = ((i - 1) << 3) | j;
 
-                        (pointTo_[0], pointTo_[1]) = _addAffine(
+                        (points_[maskTo][0], points_[maskTo][1]) = _addAffine(
                             call_,
                             p_,
+                            two_,
                             three_,
                             a_,
-                            pointFrom_[0],
-                            pointFrom_[1],
+                            points_[maskFrom][0],
+                            points_[maskFrom][1],
                             gx_,
                             gy_
                         );
                     } else {
-                        uint256[2] memory pointFrom_ = points_[(i << 3) | (j - 1)];
-
-                        (pointTo_[0], pointTo_[1]) = _addAffine(
+                        (points_[maskTo][0], points_[maskTo][1]) = _addAffine(
                             call_,
                             p_,
+                            two_,
                             three_,
                             a_,
-                            pointFrom_[0],
-                            pointFrom_[1],
+                            points_[(i << 3) | (j - 1)][0],
+                            points_[(i << 3) | (j - 1)][1],
                             hx_,
                             hy_
                         );
@@ -450,17 +400,17 @@ library ECDSA512 {
     }
 
     /**
-     * @dev Convert 96 bytes to two 384-bit unsigned integers.
+     * @dev Convert 128 bytes to two 512-bit unsigned integers.
      */
-    function _u384FromBytes2(bytes memory bytes_) private view returns (uint256, uint256) {
+    function _u512FromBytes2(bytes memory bytes_) private view returns (uint512, uint512) {
         unchecked {
-            bytes memory lhs_ = new bytes(48);
-            bytes memory rhs_ = new bytes(48);
+            bytes memory lhs_ = new bytes(64);
+            bytes memory rhs_ = new bytes(64);
 
-            MemoryUtils.unsafeCopy(bytes_.getDataPointer(), lhs_.getDataPointer(), 48);
-            MemoryUtils.unsafeCopy(bytes_.getDataPointer() + 48, rhs_.getDataPointer(), 48);
+            MemoryUtils.unsafeCopy(bytes_.getDataPointer(), lhs_.getDataPointer(), 64);
+            MemoryUtils.unsafeCopy(bytes_.getDataPointer() + 64, rhs_.getDataPointer(), 64);
 
-            return (_U512.fromBytes(lhs_), _U512.fromBytes(rhs_));
+            return (U512.fromBytes(lhs_), U512.fromBytes(rhs_));
         }
     }
 }
diff --git a/test/libs/bn/U512.test.ts b/test/libs/bn/U512.test.ts
index f9258137..90a626a7 100644
--- a/test/libs/bn/U512.test.ts
+++ b/test/libs/bn/U512.test.ts
@@ -4,7 +4,7 @@ import { Reverter } from "@/test/helpers/reverter";
 
 import { U512Mock } from "@ethers-v6";
 
-describe.only("U512", () => {
+describe("U512", () => {
   const reverter = new Reverter();
 
   let u512: U512Mock;
@@ -48,31 +48,6 @@ describe.only("U512", () => {
   afterEach(reverter.revert);
 
   it("modadd test", async () => {
-    const lowSMAX =
-      BigInt(
-        "0xAADD9DB8DBE9C48B3FD4E6AE33C9FC07CB308DB3B3C9D20ED6639CCA70330870553E5C414CA92619418661197FAC10471DB1D381085DDADDB58796829CA90069",
-      ) / 2n;
-    console.log(lowSMAX.toString(16));
-
-    console.log(
-      "7830A3318B603B89E2327145AC234CC594CBDD8D3DF91610A83441CAEA9863BC2DED5D5AA8253AA10A2EF1C98B9AC8B57F1117A72BF2C7B9E7C1AC4D77FC94CA".toLowerCase(),
-    );
-    console.log(
-      "3DF91610A83441CAEA9863BC2DED5D5AA8253AA10A2EF1C98B9AC8B57F1117A72BF2C7B9E7C1AC4D77FC94CADC083E67984050B75EBAE5DD2809BD638016F723".toLowerCase(),
-    );
-    console.log(
-      "81AEE4BDD82ED9645A21322E9C4C6A9385ED9F70B5D916C1B43B62EEF4D0098EFF3B1F78E2D0D48D50D1687B93B97D5F7C6D5047406A5E688B352209BCB9F822".toLowerCase(),
-    );
-    console.log(
-      "7DDE385D566332ECC0EABFA9CF7822FDF209F70024A57B1AA000C55B881F8111B2DCDE494A5F485E5BCA4BD88A2763AED1CA2B2FA8F0540678CD1E0F3AD80892".toLowerCase(),
-    );
-    console.log(
-      "AADD9DB8DBE9C48B3FD4E6AE33C9FC07CB308DB3B3C9D20ED6639CCA703308717D4D9B009BC66842AECDA12AE6A380E62881FF2F2D82C68528AA6056583A48F3".toLowerCase(),
-    );
-    console.log(
-      "AADD9DB8DBE9C48B3FD4E6AE33C9FC07CB308DB3B3C9D20ED6639CCA70330870553E5C414CA92619418661197FAC10471DB1D381085DDADDB58796829CA90069".toLowerCase(),
-    );
-
     for (let i = 0; i < 100; ++i) {
       const a = randomU512();
       const b = randomU512();

From ce8362328ff06a60b8eed3a77e831aa6cf866984 Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Tue, 21 Jan 2025 14:28:24 +0200
Subject: [PATCH 16/42] fixed ecdsa512 impl 22.2kk

---
 contracts/libs/crypto/ECDSA512.sol | 9 +++++----
 test/libs/crypto/ECDSA384.test.ts  | 2 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/contracts/libs/crypto/ECDSA512.sol b/contracts/libs/crypto/ECDSA512.sol
index ea882487..b6bbff36 100644
--- a/contracts/libs/crypto/ECDSA512.sol
+++ b/contracts/libs/crypto/ECDSA512.sol
@@ -8,7 +8,7 @@ import {MemoryUtils} from "../utils/MemoryUtils.sol";
 /**
  * @notice Cryptography module
  *
- * This library provides functionality for ECDSA verification over any 384-bit curve. Currently,
+ * This library provides functionality for ECDSA verification over any 512-bit curve. Currently,
  * this is the most efficient implementation out there, consuming ~8.025 million gas per call.
  *
  * The approach is Strauss-Shamir double scalar multiplication with 6 bits of precompute + affine coordinates.
@@ -206,10 +206,11 @@ library ECDSA512 {
 
                 mask_ = (mask1_ << 3) | mask2_;
 
+                (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
+                (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
+                (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
+
                 if (mask_ != 0) {
-                    (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
-                    (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
-                    (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
                     (x_, y_) = _addAffine(
                         call_,
                         p_,
diff --git a/test/libs/crypto/ECDSA384.test.ts b/test/libs/crypto/ECDSA384.test.ts
index 9294b951..de29aaac 100644
--- a/test/libs/crypto/ECDSA384.test.ts
+++ b/test/libs/crypto/ECDSA384.test.ts
@@ -168,7 +168,7 @@ describe("ECDSA384", () => {
     });
   });
 
-  describe.only("brainpoolP384r1", () => {
+  describe("brainpoolP384r1", () => {
     const signature =
       "0x42d803dcea3f9809cda4ce5a541d969dbeacd6ab7bef7788db1e4a00dac3ae87c1c241c24bb39e041725e607718fc322306b08967b56e4e49d7c9afc48833f580ac9b49cdcec0962d564f89a8f0b57a9742573ebcbe709869253e8b466cb33be";
     const pubKey =

From 8f7deb49f58a966a9162e92d223df2c7fc587c70 Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Tue, 21 Jan 2025 15:04:09 +0200
Subject: [PATCH 17/42] 20.4kk

---
 contracts/libs/bn/U512.sol         | 110 ++++++++++++++++++++++++++++-
 contracts/libs/crypto/ECDSA512.sol |  26 +++----
 2 files changed, 121 insertions(+), 15 deletions(-)

diff --git a/contracts/libs/bn/U512.sol b/contracts/libs/bn/U512.sol
index e44a110e..65bf8e24 100644
--- a/contracts/libs/bn/U512.sol
+++ b/contracts/libs/bn/U512.sol
@@ -263,6 +263,37 @@ library U512 {
         }
     }
 
+    function redadd(
+        call call_,
+        uint512 a_,
+        uint512 b_,
+        uint512 m_
+    ) internal pure returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
+
+            _redadd(call_, a_, b_, m_, r_);
+        }
+    }
+
+    function redaddAssign(call call_, uint512 a_, uint512 b_, uint512 m_) internal pure {
+        unchecked {
+            _redadd(call_, a_, b_, m_, a_);
+        }
+    }
+
+    function redaddAssignTo(
+        call call_,
+        uint512 a_,
+        uint512 b_,
+        uint512 m_,
+        uint512 to_
+    ) internal pure {
+        unchecked {
+            _redadd(call_, a_, b_, m_, to_);
+        }
+    }
+
     function modsub(
         call call_,
         uint512 a_,
@@ -314,6 +345,37 @@ library U512 {
         }
     }
 
+    function redsub(
+        call call_,
+        uint512 a_,
+        uint512 b_,
+        uint512 m_
+    ) internal pure returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
+
+            _redsub(call_, a_, b_, m_, r_);
+        }
+    }
+
+    function redsubAssign(call call_, uint512 a_, uint512 b_, uint512 m_) internal pure {
+        unchecked {
+            _redsub(call_, a_, b_, m_, a_);
+        }
+    }
+
+    function redsubAssignTo(
+        call call_,
+        uint512 a_,
+        uint512 b_,
+        uint512 m_,
+        uint512 to_
+    ) internal pure {
+        unchecked {
+            _redsub(call_, a_, b_, m_, to_);
+        }
+    }
+
     function modmul(
         call call_,
         uint512 a_,
@@ -464,12 +526,12 @@ library U512 {
                 let aWord_ := mload(add(a_, 0x20))
                 let sum_ := add(aWord_, mload(add(b_, 0x20)))
 
-                mstore(r_, sum_)
+                mstore(add(r_, 0x20), sum_)
 
                 sum_ := gt(aWord_, sum_)
                 sum_ := add(sum_, add(mload(a_), mload(b_)))
 
-                mstore(add(r_, 0x20), sum_)
+                mstore(r_, sum_)
             }
         }
     }
@@ -500,6 +562,36 @@ library U512 {
         }
     }
 
+    function _redadd(call call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) private pure {
+        unchecked {
+            uint512 buffer_ = _buffer(call_);
+            bool overflowed_;
+
+            assembly {
+                let aWord_ := mload(add(a_, 0x20))
+                let sum_ := add(aWord_, mload(add(b_, 0x20)))
+
+                mstore(add(buffer_, 0x20), sum_)
+
+                sum_ := gt(aWord_, sum_)
+                sum_ := add(sum_, add(mload(a_), mload(b_)))
+
+                mstore(buffer_, sum_)
+                overflowed_ := gt(mload(a_), sum_)
+            }
+
+            if (overflowed_ || cmp(buffer_, m_) >= 0) {
+                _sub(buffer_, m_, r_);
+                return;
+            }
+
+            assembly {
+                mstore(r_, mload(buffer_))
+                mstore(add(r_, 0x20), mload(add(buffer_, 0x20)))
+            }
+        }
+    }
+
     function _sub(uint512 a_, uint512 b_, uint512 r_) private pure {
         unchecked {
             assembly {
@@ -545,6 +637,20 @@ library U512 {
         }
     }
 
+    function _redsub(call call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) private pure {
+        unchecked {
+            if (cmp(a_, b_) >= 0) {
+                _sub(a_, b_, r_);
+                return;
+            }
+
+            uint512 buffer_ = _buffer(call_);
+
+            _add(a_, m_, buffer_);
+            _sub(buffer_, b_, r_);
+        }
+    }
+
     function _mul(uint512 a_, uint512 b_, uint512 r_) private pure {
         unchecked {
             assembly {
diff --git a/contracts/libs/crypto/ECDSA512.sol b/contracts/libs/crypto/ECDSA512.sol
index b6bbff36..a3938b04 100644
--- a/contracts/libs/crypto/ECDSA512.sol
+++ b/contracts/libs/crypto/ECDSA512.sol
@@ -162,11 +162,11 @@ library ECDSA512 {
             uint512 rhs_ = U512.modexp(call_, x_, U512.fromUint256(3), p_);
 
             if (!U512.eqUint256(a_, 0)) {
-                rhs_ = U512.modadd(call_, rhs_, U512.modmul(call_, x_, a_, p_), p_); // x^3 + a*x
+                rhs_ = U512.redadd(call_, rhs_, U512.modmul(call_, x_, a_, p_), p_); // x^3 + a*x
             }
 
             if (!U512.eqUint256(b_, 0)) {
-                rhs_ = U512.modadd(call_, rhs_, b_, p_); // x^3 + a*x + b
+                rhs_ = U512.redadd(call_, rhs_, b_, p_); // x^3 + a*x + b
             }
 
             return U512.eq(lhs_, rhs_);
@@ -279,18 +279,18 @@ library ECDSA512 {
 
             uint512 m1_ = U512.modexp(call_, x1_, two_, p_);
             U512.modmulAssign(call_, m1_, three_, p_);
-            U512.modaddAssign(call_, m1_, a_, p_);
+            U512.redaddAssign(call_, m1_, a_, p_);
 
             uint512 m2_ = U512.modmul(call_, y1_, two_, p_);
             U512.moddivAssign(call_, m1_, m2_, p_);
 
             x2_ = U512.modexp(call_, m1_, two_, p_);
-            U512.modsubAssign(call_, x2_, x1_, p_);
-            U512.modsubAssign(call_, x2_, x1_, p_);
+            U512.redsubAssign(call_, x2_, x1_, p_);
+            U512.redsubAssign(call_, x2_, x1_, p_);
 
-            y2_ = U512.modsub(call_, x1_, x2_, p_);
+            y2_ = U512.redsub(call_, x1_, x2_, p_);
             U512.modmulAssign(call_, y2_, m1_, p_);
-            U512.modsubAssign(call_, y2_, y1_, p_);
+            U512.redsubAssign(call_, y2_, y1_, p_);
         }
     }
 
@@ -328,18 +328,18 @@ library ECDSA512 {
                 return (x3, y3);
             }
 
-            uint512 m1_ = U512.modsub(call_, y1_, y2_, p_);
-            uint512 m2_ = U512.modsub(call_, x1_, x2_, p_);
+            uint512 m1_ = U512.redsub(call_, y1_, y2_, p_);
+            uint512 m2_ = U512.redsub(call_, x1_, x2_, p_);
 
             U512.moddivAssign(call_, m1_, m2_, p_);
 
             x3 = U512.modexp(call_, m1_, two_, p_);
-            U512.modsubAssign(call_, x3, x1_, p_);
-            U512.modsubAssign(call_, x3, x2_, p_);
+            U512.redsubAssign(call_, x3, x1_, p_);
+            U512.redsubAssign(call_, x3, x2_, p_);
 
-            y3 = U512.modsub(call_, x1_, x3, p_);
+            y3 = U512.redsub(call_, x1_, x3, p_);
             U512.modmulAssign(call_, y3, m1_, p_);
-            U512.modsubAssign(call_, y3, y1_, p_);
+            U512.redsubAssign(call_, y3, y1_, p_);
         }
     }
 

From 007dc1c808e90bdcbb03e7e71270eb01de16b0f1 Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Tue, 21 Jan 2025 15:13:37 +0200
Subject: [PATCH 18/42] added assert

---
 contracts/libs/crypto/ECDSA512.sol | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/contracts/libs/crypto/ECDSA512.sol b/contracts/libs/crypto/ECDSA512.sol
index a3938b04..82b59bcd 100644
--- a/contracts/libs/crypto/ECDSA512.sol
+++ b/contracts/libs/crypto/ECDSA512.sol
@@ -405,6 +405,8 @@ library ECDSA512 {
      */
     function _u512FromBytes2(bytes memory bytes_) private view returns (uint512, uint512) {
         unchecked {
+            assert(bytes_.length == 128);
+
             bytes memory lhs_ = new bytes(64);
             bytes memory rhs_ = new bytes(64);
 

From e650ce35e379d501cc9b8c512ff67c539cbc2ecf Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Tue, 21 Jan 2025 15:15:50 +0200
Subject: [PATCH 19/42] small fixes

---
 contracts/libs/crypto/ECDSA384.sol | 3 ++-
 contracts/libs/crypto/ECDSA512.sol | 9 +++------
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/contracts/libs/crypto/ECDSA384.sol b/contracts/libs/crypto/ECDSA384.sol
index f79eed01..2548804e 100644
--- a/contracts/libs/crypto/ECDSA384.sol
+++ b/contracts/libs/crypto/ECDSA384.sol
@@ -195,8 +195,9 @@ library ECDSA384 {
 
                 mask_ = (mask1_ << 3) | mask2_;
 
+                (x_, y_) = _twice3Affine(call_, p_, three_, a_, x_, y_);
+
                 if (mask_ != 0) {
-                    (x_, y_) = _twice3Affine(call_, p_, three_, a_, x_, y_);
                     (x_, y_) = _addAffine(
                         call_,
                         p_,
diff --git a/contracts/libs/crypto/ECDSA512.sol b/contracts/libs/crypto/ECDSA512.sol
index 82b59bcd..584e6cde 100644
--- a/contracts/libs/crypto/ECDSA512.sol
+++ b/contracts/libs/crypto/ECDSA512.sol
@@ -9,18 +9,15 @@ import {MemoryUtils} from "../utils/MemoryUtils.sol";
  * @notice Cryptography module
  *
  * This library provides functionality for ECDSA verification over any 512-bit curve. Currently,
- * this is the most efficient implementation out there, consuming ~8.025 million gas per call.
+ * this is the most efficient implementation out there, consuming ~20.2 million gas per call.
  *
  * The approach is Strauss-Shamir double scalar multiplication with 6 bits of precompute + affine coordinates.
- * For reference, naive implementation uses ~400 billion gas, which is 50000 times more expensive.
- *
- * We also tried using projective coordinates, however, the gas consumption rose to ~9 million gas.
  */
 library ECDSA512 {
     using MemoryUtils for *;
 
     /**
-     * @notice 384-bit curve parameters.
+     * @notice 512-bit curve parameters.
      */
     struct Parameters {
         bytes a;
@@ -51,7 +48,7 @@ library ECDSA512 {
 
     /**
      * @notice The function to verify the ECDSA signature
-     * @param curveParams_ the 384-bit curve parameters. `lowSmax` is `n / 2`.
+     * @param curveParams_ the 512-bit curve parameters. `lowSmax` is `n / 2`.
      * @param hashedMessage_ the already hashed message to be verified.
      * @param signature_ the ECDSA signature. Equals to `bytes(r) + bytes(s)`.
      * @param pubKey_ the full public key of a signer. Equals to `bytes(x) + bytes(y)`.

From 85684f1af06e332862baed345a41930d3dbdd109 Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Tue, 21 Jan 2025 15:24:34 +0200
Subject: [PATCH 20/42] refactored

---
 contracts/libs/crypto/ECDSA384.sol            | 364 ++++++-------
 contracts/libs/crypto/backend/U512.sol        | 498 ------------------
 contracts/libs/crypto/opt/ECDSA384.sol        | 467 ++++++++++++++++
 .../libs/crypto/{backend => opt}/U384.sol     |   0
 contracts/mock/libs/crypto/ECDSA384Mock.sol   |   9 +-
 test/libs/crypto/ECDSA384.test.ts             |   2 +-
 6 files changed, 621 insertions(+), 719 deletions(-)
 delete mode 100644 contracts/libs/crypto/backend/U512.sol
 create mode 100644 contracts/libs/crypto/opt/ECDSA384.sol
 rename contracts/libs/crypto/{backend => opt}/U384.sol (100%)

diff --git a/contracts/libs/crypto/ECDSA384.sol b/contracts/libs/crypto/ECDSA384.sol
index 2548804e..944b521f 100644
--- a/contracts/libs/crypto/ECDSA384.sol
+++ b/contracts/libs/crypto/ECDSA384.sol
@@ -1,19 +1,17 @@
 // SPDX-License-Identifier: MIT
 pragma solidity ^0.8.4;
 
+import {call, uint512} from "../bn/U512.sol";
+import {U512} from "../bn/U512.sol";
 import {MemoryUtils} from "../utils/MemoryUtils.sol";
-import {_U384} from "./backend/U384.sol";
 
 /**
  * @notice Cryptography module
  *
  * This library provides functionality for ECDSA verification over any 384-bit curve. Currently,
- * this is the most efficient implementation out there, consuming ~7.767 million gas per call.
+ * this is the most efficient implementation out there, consuming ~20.2 million gas per call.
  *
  * The approach is Strauss-Shamir double scalar multiplication with 6 bits of precompute + affine coordinates.
- * For reference, naive implementation uses ~400 billion gas, which is ~50000 times more expensive.
- *
- * We also tried using projective coordinates, however, the gas consumption rose to ~9 million gas.
  */
 library ECDSA384 {
     using MemoryUtils for *;
@@ -32,20 +30,20 @@ library ECDSA384 {
     }
 
     struct _Parameters {
-        uint256 a;
-        uint256 b;
-        uint256 gx;
-        uint256 gy;
-        uint256 p;
-        uint256 n;
-        uint256 lowSmax;
+        uint512 a;
+        uint512 b;
+        uint512 gx;
+        uint512 gy;
+        uint512 p;
+        uint512 n;
+        uint512 lowSmax;
     }
 
     struct _Inputs {
-        uint256 r;
-        uint256 s;
-        uint256 x;
-        uint256 y;
+        uint512 r;
+        uint512 s;
+        uint512 x;
+        uint512 y;
     }
 
     /**
@@ -67,27 +65,27 @@ library ECDSA384 {
         unchecked {
             _Inputs memory inputs_;
 
-            (inputs_.r, inputs_.s) = _u384FromBytes2(signature_);
-            (inputs_.x, inputs_.y) = _u384FromBytes2(pubKey_);
+            (inputs_.r, inputs_.s) = _u512FromBytes2(signature_);
+            (inputs_.x, inputs_.y) = _u512FromBytes2(pubKey_);
 
             _Parameters memory params_ = _Parameters({
-                a: _U384.fromBytes(curveParams_.a),
-                b: _U384.fromBytes(curveParams_.b),
-                gx: _U384.fromBytes(curveParams_.gx),
-                gy: _U384.fromBytes(curveParams_.gy),
-                p: _U384.fromBytes(curveParams_.p),
-                n: _U384.fromBytes(curveParams_.n),
-                lowSmax: _U384.fromBytes(curveParams_.lowSmax)
+                a: U512.fromBytes(curveParams_.a),
+                b: U512.fromBytes(curveParams_.b),
+                gx: U512.fromBytes(curveParams_.gx),
+                gy: U512.fromBytes(curveParams_.gy),
+                p: U512.fromBytes(curveParams_.p),
+                n: U512.fromBytes(curveParams_.n),
+                lowSmax: U512.fromBytes(curveParams_.lowSmax)
             });
 
-            uint256 call_ = _U384.initCall(params_.p);
+            call call_ = U512.initCall();
 
             /// accept s only from the lower part of the curve
             if (
-                _U384.eqUint256(inputs_.r, 0) ||
-                _U384.cmp(inputs_.r, params_.n) >= 0 ||
-                _U384.eqUint256(inputs_.s, 0) ||
-                _U384.cmp(inputs_.s, params_.lowSmax) > 0
+                U512.eqUint256(inputs_.r, 0) ||
+                U512.cmp(inputs_.r, params_.n) >= 0 ||
+                U512.eqUint256(inputs_.s, 0) ||
+                U512.cmp(inputs_.s, params_.lowSmax) > 0
             ) {
                 return false;
             }
@@ -96,22 +94,21 @@ library ECDSA384 {
                 return false;
             }
 
-            uint256 scalar1_ = _U384.moddiv(
+            uint512 scalar1_ = U512.moddiv(
                 call_,
-                _U384.fromBytes(hashedMessage_),
+                U512.fromBytes(hashedMessage_),
                 inputs_.s,
                 params_.n
             );
-            uint256 scalar2_ = _U384.moddiv(call_, inputs_.r, inputs_.s, params_.n);
+            uint512 scalar2_ = U512.moddiv(call_, inputs_.r, inputs_.s, params_.n);
 
             {
-                uint256 three_ = _U384.fromUint256(3);
-
                 /// We use 6-bit masks where the first 3 bits refer to `scalar1` and the last 3 bits refer to `scalar2`.
-                uint256[2][64] memory points_ = _precomputePointsTable(
+                uint512[2][64] memory points_ = _precomputePointsTable(
                     call_,
                     params_.p,
-                    three_,
+                    U512.fromUint256(2),
+                    U512.fromUint256(3),
                     params_.a,
                     inputs_.x,
                     inputs_.y,
@@ -122,7 +119,8 @@ library ECDSA384 {
                 (scalar1_, ) = _doubleScalarMultiplication(
                     call_,
                     params_.p,
-                    three_,
+                    U512.fromUint256(2),
+                    U512.fromUint256(3),
                     params_.a,
                     points_,
                     scalar1_,
@@ -130,9 +128,9 @@ library ECDSA384 {
                 );
             }
 
-            _U384.modAssign(call_, scalar1_, params_.n);
+            U512.modAssign(call_, scalar1_, params_.n);
 
-            return _U384.eq(scalar1_, inputs_.r);
+            return U512.eq(scalar1_, inputs_.r);
         }
     }
 
@@ -140,35 +138,35 @@ library ECDSA384 {
      * @dev Check if a point in affine coordinates is on the curve.
      */
     function _isOnCurve(
-        uint256 call_,
-        uint256 p_,
-        uint256 a_,
-        uint256 b_,
-        uint256 x_,
-        uint256 y_
+        call call_,
+        uint512 p_,
+        uint512 a_,
+        uint512 b_,
+        uint512 x_,
+        uint512 y_
     ) private view returns (bool) {
         unchecked {
             if (
-                _U384.eqUint256(x_, 0) ||
-                _U384.eq(x_, p_) ||
-                _U384.eqUint256(y_, 0) ||
-                _U384.eq(y_, p_)
+                U512.eqUint256(x_, 0) ||
+                U512.eq(x_, p_) ||
+                U512.eqUint256(y_, 0) ||
+                U512.eq(y_, p_)
             ) {
                 return false;
             }
 
-            uint256 lhs_ = _U384.modexp(call_, y_, 2);
-            uint256 rhs_ = _U384.modexp(call_, x_, 3);
+            uint512 lhs_ = U512.modexp(call_, y_, U512.fromUint256(2), p_);
+            uint512 rhs_ = U512.modexp(call_, x_, U512.fromUint256(3), p_);
 
-            if (!_U384.eqUint256(a_, 0)) {
-                rhs_ = _U384.modadd(rhs_, _U384.modmul(call_, x_, a_), p_); // x^3 + a*x
+            if (!U512.eqUint256(a_, 0)) {
+                rhs_ = U512.redadd(call_, rhs_, U512.modmul(call_, x_, a_, p_), p_); // x^3 + a*x
             }
 
-            if (!_U384.eqUint256(b_, 0)) {
-                rhs_ = _U384.modadd(rhs_, b_, p_); // x^3 + a*x + b
+            if (!U512.eqUint256(b_, 0)) {
+                rhs_ = U512.redadd(call_, rhs_, b_, p_); // x^3 + a*x + b
             }
 
-            return _U384.eq(lhs_, rhs_);
+            return U512.eq(lhs_, rhs_);
         }
     }
 
@@ -176,14 +174,15 @@ library ECDSA384 {
      * @dev Compute the Strauss-Shamir double scalar multiplication scalar1*G + scalar2*H.
      */
     function _doubleScalarMultiplication(
-        uint256 call_,
-        uint256 p_,
-        uint256 three_,
-        uint256 a_,
-        uint256[2][64] memory points_,
-        uint256 scalar1_,
-        uint256 scalar2_
-    ) private view returns (uint256 x_, uint256 y_) {
+        call call_,
+        uint512 p_,
+        uint512 two_,
+        uint512 three_,
+        uint512 a_,
+        uint512[2][64] memory points_,
+        uint512 scalar1_,
+        uint512 scalar2_
+    ) private view returns (uint512 x_, uint512 y_) {
         unchecked {
             uint256 mask_;
             uint256 mask1_;
@@ -195,12 +194,15 @@ library ECDSA384 {
 
                 mask_ = (mask1_ << 3) | mask2_;
 
-                (x_, y_) = _twice3Affine(call_, p_, three_, a_, x_, y_);
+                (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
+                (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
+                (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
 
                 if (mask_ != 0) {
                     (x_, y_) = _addAffine(
                         call_,
                         p_,
+                        two_,
                         three_,
                         a_,
                         points_[mask_][0],
@@ -215,7 +217,7 @@ library ECDSA384 {
         }
     }
 
-    function _getWord(uint256 scalar_, uint256 bit_) private pure returns (uint256) {
+    function _getWord(uint512 scalar_, uint256 bit_) private pure returns (uint256) {
         unchecked {
             uint256 word_;
             if (bit_ <= 253) {
@@ -238,111 +240,37 @@ library ECDSA384 {
      * @dev Double an elliptic curve point in affine coordinates.
      */
     function _twiceAffine(
-        uint256 call_,
-        uint256 p_,
-        uint256 three_,
-        uint256 a_,
-        uint256 x1_,
-        uint256 y1_
-    ) private view returns (uint256 x2_, uint256 y2_) {
+        call call_,
+        uint512 p_,
+        uint512 two_,
+        uint512 three_,
+        uint512 a_,
+        uint512 x1_,
+        uint512 y1_
+    ) private view returns (uint512 x2_, uint512 y2_) {
         unchecked {
-            if (x1_ == 0) {
-                return (0, 0);
+            if (U512.isNull(x1_)) {
+                return (x2_, y2_);
             }
 
-            if (_U384.eqUint256(y1_, 0)) {
-                return (0, 0);
+            if (U512.eqUint256(y1_, 0)) {
+                return (x2_, y2_);
             }
 
-            uint256 m1_ = _U384.modexp(call_, x1_, 2);
-            _U384.modmulAssign(call_, m1_, three_);
-            _U384.modaddAssign(m1_, a_, p_);
+            uint512 m1_ = U512.modexp(call_, x1_, two_, p_);
+            U512.modmulAssign(call_, m1_, three_, p_);
+            U512.redaddAssign(call_, m1_, a_, p_);
 
-            uint256 m2_ = _U384.modshl1(y1_, p_);
-            _U384.moddivAssign(call_, m1_, m2_);
+            uint512 m2_ = U512.modmul(call_, y1_, two_, p_);
+            U512.moddivAssign(call_, m1_, m2_, p_);
 
-            x2_ = _U384.modexp(call_, m1_, 2);
-            _U384.modsubAssign(x2_, x1_, p_);
-            _U384.modsubAssign(x2_, x1_, p_);
+            x2_ = U512.modexp(call_, m1_, two_, p_);
+            U512.redsubAssign(call_, x2_, x1_, p_);
+            U512.redsubAssign(call_, x2_, x1_, p_);
 
-            y2_ = _U384.modsub(x1_, x2_, p_);
-            _U384.modmulAssign(call_, y2_, m1_);
-            _U384.modsubAssign(y2_, y1_, p_);
-        }
-    }
-
-    /**
-     * @dev Doubles an elliptic curve point 3 times in affine coordinates.
-     */
-    function _twice3Affine(
-        uint256 call_,
-        uint256 p_,
-        uint256 three_,
-        uint256 a_,
-        uint256 x1_,
-        uint256 y1_
-    ) private view returns (uint256 x2_, uint256 y2_) {
-        unchecked {
-            if (x1_ == 0) {
-                return (0, 0);
-            }
-
-            if (_U384.eqUint256(y1_, 0)) {
-                return (0, 0);
-            }
-
-            uint256 m1 = _U384.modexp(call_, x1_, 2);
-            _U384.modmulAssign(call_, m1, three_);
-            _U384.modaddAssign(m1, a_, p_);
-
-            uint256 m2 = _U384.modshl1(y1_, p_);
-            _U384.moddivAssign(call_, m1, m2);
-
-            x2_ = _U384.modexp(call_, m1, 2);
-            _U384.modsubAssign(x2_, x1_, p_);
-            _U384.modsubAssign(x2_, x1_, p_);
-
-            y2_ = _U384.modsub(x1_, x2_, p_);
-            _U384.modmulAssign(call_, y2_, m1);
-            _U384.modsubAssign(y2_, y1_, p_);
-
-            if (_U384.eqUint256(y2_, 0)) {
-                return (0, 0);
-            }
-
-            _U384.modexpAssignTo(call_, m1, x2_, 2);
-            _U384.modmulAssign(call_, m1, three_);
-            _U384.modaddAssign(m1, a_, p_);
-
-            _U384.modshl1AssignTo(m2, y2_, p_);
-            _U384.moddivAssign(call_, m1, m2);
-
-            _U384.modexpAssignTo(call_, x1_, m1, 2);
-            _U384.modsubAssign(x1_, x2_, p_);
-            _U384.modsubAssign(x1_, x2_, p_);
-
-            _U384.modsubAssignTo(y1_, x2_, x1_, p_);
-            _U384.modmulAssign(call_, y1_, m1);
-            _U384.modsubAssign(y1_, y2_, p_);
-
-            if (_U384.eqUint256(y1_, 0)) {
-                return (0, 0);
-            }
-
-            _U384.modexpAssignTo(call_, m1, x1_, 2);
-            _U384.modmulAssign(call_, m1, three_);
-            _U384.modaddAssign(m1, a_, p_);
-
-            _U384.modshl1AssignTo(m2, y1_, p_);
-            _U384.moddivAssign(call_, m1, m2);
-
-            _U384.modexpAssignTo(call_, x2_, m1, 2);
-            _U384.modsubAssign(x2_, x1_, p_);
-            _U384.modsubAssign(x2_, x1_, p_);
-
-            _U384.modsubAssignTo(y2_, x1_, x2_, p_);
-            _U384.modmulAssign(call_, y2_, m1);
-            _U384.modsubAssign(y2_, y1_, p_);
+            y2_ = U512.redsub(call_, x1_, x2_, p_);
+            U512.modmulAssign(call_, y2_, m1_, p_);
+            U512.redsubAssign(call_, y2_, y1_, p_);
         }
     }
 
@@ -350,63 +278,65 @@ library ECDSA384 {
      * @dev Add two elliptic curve points in affine coordinates.
      */
     function _addAffine(
-        uint256 call_,
-        uint256 p_,
-        uint256 three_,
-        uint256 a_,
-        uint256 x1_,
-        uint256 y1_,
-        uint256 x2_,
-        uint256 y2_
-    ) private view returns (uint256 x3, uint256 y3) {
+        call call_,
+        uint512 p_,
+        uint512 two_,
+        uint512 three_,
+        uint512 a_,
+        uint512 x1_,
+        uint512 y1_,
+        uint512 x2_,
+        uint512 y2_
+    ) private view returns (uint512 x3, uint512 y3) {
         unchecked {
-            if (x1_ == 0 || x2_ == 0) {
-                if (x1_ == 0 && x2_ == 0) {
-                    return (0, 0);
+            if (U512.isNull(x1_) || U512.isNull(x2_)) {
+                if (U512.isNull(x1_) && U512.isNull(x2_)) {
+                    return (x3, y3);
                 }
 
                 return
-                    x1_ == 0
-                        ? (_U384.copy(x2_), _U384.copy(y2_))
-                        : (_U384.copy(x1_), _U384.copy(y1_));
+                    U512.isNull(x1_)
+                        ? (U512.copy(x2_), U512.copy(y2_))
+                        : (U512.copy(x1_), U512.copy(y1_));
             }
 
-            if (_U384.eq(x1_, x2_)) {
-                if (_U384.eq(y1_, y2_)) {
-                    return _twiceAffine(call_, p_, three_, a_, x1_, y1_);
+            if (U512.eq(x1_, x2_)) {
+                if (U512.eq(y1_, y2_)) {
+                    return _twiceAffine(call_, p_, two_, three_, a_, x1_, y1_);
                 }
 
-                return (0, 0);
+                return (x3, y3);
             }
 
-            uint256 m1_ = _U384.modsub(y1_, y2_, p_);
-            uint256 m2_ = _U384.modsub(x1_, x2_, p_);
+            uint512 m1_ = U512.redsub(call_, y1_, y2_, p_);
+            uint512 m2_ = U512.redsub(call_, x1_, x2_, p_);
 
-            _U384.moddivAssign(call_, m1_, m2_);
+            U512.moddivAssign(call_, m1_, m2_, p_);
 
-            x3 = _U384.modexp(call_, m1_, 2);
-            _U384.modsubAssign(x3, x1_, p_);
-            _U384.modsubAssign(x3, x2_, p_);
+            x3 = U512.modexp(call_, m1_, two_, p_);
+            U512.redsubAssign(call_, x3, x1_, p_);
+            U512.redsubAssign(call_, x3, x2_, p_);
 
-            y3 = _U384.modsub(x1_, x3, p_);
-            _U384.modmulAssign(call_, y3, m1_);
-            _U384.modsubAssign(y3, y1_, p_);
+            y3 = U512.redsub(call_, x1_, x3, p_);
+            U512.modmulAssign(call_, y3, m1_, p_);
+            U512.redsubAssign(call_, y3, y1_, p_);
         }
     }
 
     function _precomputePointsTable(
-        uint256 call_,
-        uint256 p_,
-        uint256 three_,
-        uint256 a_,
-        uint256 hx_,
-        uint256 hy_,
-        uint256 gx_,
-        uint256 gy_
-    ) private view returns (uint256[2][64] memory points_) {
+        call call_,
+        uint512 p_,
+        uint512 two_,
+        uint512 three_,
+        uint512 a_,
+        uint512 hx_,
+        uint512 hy_,
+        uint512 gx_,
+        uint512 gy_
+    ) private view returns (uint512[2][64] memory points_) {
         unchecked {
-            (points_[0x01][0], points_[0x01][1]) = (_U384.copy(hx_), _U384.copy(hy_));
-            (points_[0x08][0], points_[0x08][1]) = (_U384.copy(gx_), _U384.copy(gy_));
+            (points_[0x01][0], points_[0x01][1]) = (U512.copy(hx_), U512.copy(hy_));
+            (points_[0x08][0], points_[0x08][1]) = (U512.copy(gx_), U512.copy(gy_));
 
             for (uint256 i = 0; i < 8; ++i) {
                 for (uint256 j = 0; j < 8; ++j) {
@@ -414,31 +344,31 @@ library ECDSA384 {
                         continue;
                     }
 
-                    uint256[2] memory pointTo_ = points_[(i << 3) | j];
+                    uint256 maskTo = (i << 3) | j;
 
                     if (i != 0) {
-                        uint256[2] memory pointFrom_ = points_[((i - 1) << 3) | j];
+                        uint256 maskFrom = ((i - 1) << 3) | j;
 
-                        (pointTo_[0], pointTo_[1]) = _addAffine(
+                        (points_[maskTo][0], points_[maskTo][1]) = _addAffine(
                             call_,
                             p_,
+                            two_,
                             three_,
                             a_,
-                            pointFrom_[0],
-                            pointFrom_[1],
+                            points_[maskFrom][0],
+                            points_[maskFrom][1],
                             gx_,
                             gy_
                         );
                     } else {
-                        uint256[2] memory pointFrom_ = points_[(i << 3) | (j - 1)];
-
-                        (pointTo_[0], pointTo_[1]) = _addAffine(
+                        (points_[maskTo][0], points_[maskTo][1]) = _addAffine(
                             call_,
                             p_,
+                            two_,
                             three_,
                             a_,
-                            pointFrom_[0],
-                            pointFrom_[1],
+                            points_[(i << 3) | (j - 1)][0],
+                            points_[(i << 3) | (j - 1)][1],
                             hx_,
                             hy_
                         );
@@ -451,17 +381,19 @@ library ECDSA384 {
     }
 
     /**
-     * @dev Convert 96 bytes to two 384-bit unsigned integers.
+     * @dev Convert 96 bytes to two 512-bit unsigned integers.
      */
-    function _u384FromBytes2(bytes memory bytes_) private view returns (uint256, uint256) {
+    function _u512FromBytes2(bytes memory bytes_) private view returns (uint512, uint512) {
         unchecked {
+            assert(bytes_.length == 96);
+
             bytes memory lhs_ = new bytes(48);
             bytes memory rhs_ = new bytes(48);
 
             MemoryUtils.unsafeCopy(bytes_.getDataPointer(), lhs_.getDataPointer(), 48);
             MemoryUtils.unsafeCopy(bytes_.getDataPointer() + 48, rhs_.getDataPointer(), 48);
 
-            return (_U384.fromBytes(lhs_), _U384.fromBytes(rhs_));
+            return (U512.fromBytes(lhs_), U512.fromBytes(rhs_));
         }
     }
 }
diff --git a/contracts/libs/crypto/backend/U512.sol b/contracts/libs/crypto/backend/U512.sol
deleted file mode 100644
index fe28ad68..00000000
--- a/contracts/libs/crypto/backend/U512.sol
+++ /dev/null
@@ -1,498 +0,0 @@
-// SPDX-License-Identifier: MIT
-pragma solidity ^0.8.4;
-
-/**
- * @notice Low-level utility library that implements unsigned 384-bit arithmetics.
- *
- * Serves for internal purposes only.
- */
-library _U512 {
-    uint256 private constant _UINT384_ALLOCATION = 64;
-    uint256 private constant _CALL_ALLOCATION = 4 * 288;
-    uint256 private constant _MUL_OFFSET = 288;
-    uint256 private constant _EXP_OFFSET = 2 * 288;
-    uint256 private constant _INV_OFFSET = 3 * 288;
-
-    function initCall(uint256 m_) internal pure returns (uint256 handler_) {
-        unchecked {
-            handler_ = _allocate(_CALL_ALLOCATION);
-
-            _sub(m_, fromUint256(2), handler_ + _INV_OFFSET + 0xA0);
-
-            assembly {
-                let call_ := add(handler_, _MUL_OFFSET)
-
-                mstore(call_, 0x60)
-                mstore(add(0x20, call_), 0x20)
-                mstore(add(0x40, call_), 0x40)
-                mstore(add(0xC0, call_), 0x01)
-                mstore(add(0xE0, call_), mload(m_))
-                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
-
-                call_ := add(handler_, _EXP_OFFSET)
-
-                mstore(call_, 0x40)
-                mstore(add(0x20, call_), 0x20)
-                mstore(add(0x40, call_), 0x40)
-                mstore(add(0xC0, call_), mload(m_))
-                mstore(add(0xE0, call_), mload(add(m_, 0x20)))
-
-                call_ := add(handler_, _INV_OFFSET)
-
-                mstore(call_, 0x40)
-                mstore(add(0x20, call_), 0x40)
-                mstore(add(0x40, call_), 0x40)
-                mstore(add(0xE0, call_), mload(m_))
-                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
-            }
-        }
-    }
-
-    function fromUint256(uint256 u256_) internal pure returns (uint256 handler_) {
-        unchecked {
-            handler_ = _allocate(_UINT384_ALLOCATION);
-
-            assembly {
-                mstore(handler_, 0x00)
-                mstore(add(handler_, 0x20), u256_)
-            }
-        }
-    }
-
-    function fromBytes(bytes memory bytes_) internal view returns (uint256 handler_) {
-        unchecked {
-            assert(bytes_.length < 49);
-
-            handler_ = _allocate(_UINT384_ALLOCATION);
-
-            assembly {
-                mstore(handler_, 0)
-                mstore(add(handler_, 0x20), 0)
-
-                let size_ := mload(bytes_)
-                pop(
-                    staticcall(
-                        gas(),
-                        0x4,
-                        add(bytes_, 0x20),
-                        size_,
-                        add(handler_, sub(0x40, size_)),
-                        size_
-                    )
-                )
-            }
-        }
-    }
-
-    function copy(uint256 handler_) internal pure returns (uint256 handlerCopy_) {
-        unchecked {
-            handlerCopy_ = _allocate(_UINT384_ALLOCATION);
-
-            assembly {
-                mstore(handlerCopy_, mload(handler_))
-                mstore(add(handlerCopy_, 0x20), mload(add(handler_, 0x20)))
-            }
-
-            return handlerCopy_;
-        }
-    }
-
-    function eq(uint256 a_, uint256 b_) internal pure returns (bool eq_) {
-        assembly {
-            eq_ := and(eq(mload(a_), mload(b_)), eq(mload(add(a_, 0x20)), mload(add(b_, 0x20))))
-        }
-    }
-
-    function eqUint256(uint256 a_, uint256 bInteger_) internal pure returns (bool eq_) {
-        assembly {
-            eq_ := and(eq(mload(a_), 0), eq(mload(add(a_, 0x20)), bInteger_))
-        }
-    }
-
-    function cmp(uint256 a_, uint256 b_) internal pure returns (int256) {
-        unchecked {
-            uint256 aWord_;
-            uint256 bWord_;
-
-            assembly {
-                aWord_ := mload(a_)
-                bWord_ := mload(b_)
-            }
-
-            if (aWord_ > bWord_) {
-                return 1;
-            }
-
-            if (aWord_ < bWord_) {
-                return -1;
-            }
-
-            assembly {
-                aWord_ := mload(add(a_, 0x20))
-                bWord_ := mload(add(b_, 0x20))
-            }
-
-            if (aWord_ > bWord_) {
-                return 1;
-            }
-
-            if (aWord_ < bWord_) {
-                return -1;
-            }
-
-            return 0;
-        }
-    }
-
-    function modAssign(uint256 call_, uint256 a_, uint256 m_) internal view {
-        assembly {
-            mstore(call_, 0x40)
-            mstore(add(0x20, call_), 0x20)
-            mstore(add(0x40, call_), 0x40)
-            mstore(add(0x60, call_), mload(a_))
-            mstore(add(0x80, call_), mload(add(a_, 0x20)))
-            mstore(add(0xA0, call_), 0x01)
-            mstore(add(0xC0, call_), mload(m_))
-            mstore(add(0xE0, call_), mload(add(m_, 0x20)))
-
-            pop(staticcall(gas(), 0x5, call_, 0x0100, a_, 0x40))
-        }
-    }
-
-    function modexp(
-        uint256 call_,
-        uint256 b_,
-        uint256 eInteger_
-    ) internal view returns (uint256 r_) {
-        unchecked {
-            r_ = _allocate(_UINT384_ALLOCATION);
-
-            assembly {
-                call_ := add(call_, _EXP_OFFSET)
-
-                mstore(add(0x60, call_), mload(b_))
-                mstore(add(0x80, call_), mload(add(b_, 0x20)))
-                mstore(add(0xA0, call_), eInteger_)
-
-                pop(staticcall(gas(), 0x5, call_, 0x0100, r_, 0x40))
-            }
-
-            return r_;
-        }
-    }
-
-    function modexpAssignTo(
-        uint256 call_,
-        uint256 to_,
-        uint256 b_,
-        uint256 eInteger_
-    ) internal view {
-        assembly {
-            call_ := add(call_, _EXP_OFFSET)
-
-            mstore(add(0x60, call_), mload(b_))
-            mstore(add(0x80, call_), mload(add(b_, 0x20)))
-            mstore(add(0xA0, call_), eInteger_)
-
-            pop(staticcall(gas(), 0x5, call_, 0x0100, to_, 0x40))
-        }
-    }
-
-    function modadd(uint256 a_, uint256 b_, uint256 m_) internal pure returns (uint256 r_) {
-        unchecked {
-            r_ = _allocate(_UINT384_ALLOCATION);
-
-            _add(a_, b_, r_);
-
-            if (cmp(r_, m_) >= 0) {
-                _subFrom(r_, m_);
-            }
-
-            return r_;
-        }
-    }
-
-    function modaddAssign(uint256 a_, uint256 b_, uint256 m_) internal pure {
-        unchecked {
-            _addTo(a_, b_);
-
-            if (cmp(a_, m_) >= 0) {
-                return _subFrom(a_, m_);
-            }
-        }
-    }
-
-    function modmul(uint256 call_, uint256 a_, uint256 b_) internal view returns (uint256 r_) {
-        unchecked {
-            r_ = _allocate(_UINT384_ALLOCATION);
-
-            _mul(a_, b_, call_ + _MUL_OFFSET + 0x60);
-
-            assembly {
-                call_ := add(call_, _MUL_OFFSET)
-
-                pop(staticcall(gas(), 0x5, call_, 0x0120, r_, 0x40))
-            }
-
-            return r_;
-        }
-    }
-
-    function modmulAssign(uint256 call_, uint256 a_, uint256 b_) internal view {
-        unchecked {
-            _mul(a_, b_, call_ + _MUL_OFFSET + 0x60);
-
-            assembly {
-                call_ := add(call_, _MUL_OFFSET)
-
-                pop(staticcall(gas(), 0x5, call_, 0x0120, a_, 0x40))
-            }
-        }
-    }
-
-    function modsub(uint256 a_, uint256 b_, uint256 m_) internal pure returns (uint256 r_) {
-        unchecked {
-            r_ = _allocate(_UINT384_ALLOCATION);
-
-            if (cmp(a_, b_) >= 0) {
-                _sub(a_, b_, r_);
-                return r_;
-            }
-
-            _add(a_, m_, r_);
-            _subFrom(r_, b_);
-        }
-    }
-
-    function modsubAssign(uint256 a_, uint256 b_, uint256 m_) internal pure {
-        unchecked {
-            if (cmp(a_, b_) >= 0) {
-                _subFrom(a_, b_);
-                return;
-            }
-
-            _addTo(a_, m_);
-            _subFrom(a_, b_);
-        }
-    }
-
-    function modsubAssignTo(uint256 to_, uint256 a_, uint256 b_, uint256 m_) internal pure {
-        unchecked {
-            if (cmp(a_, b_) >= 0) {
-                _sub(a_, b_, to_);
-                return;
-            }
-
-            _add(a_, m_, to_);
-            _subFrom(to_, b_);
-        }
-    }
-
-    function modshl1(uint256 a_, uint256 m_) internal pure returns (uint256 r_) {
-        unchecked {
-            r_ = _allocate(_UINT384_ALLOCATION);
-
-            _shl1(a_, r_);
-
-            if (cmp(r_, m_) >= 0) {
-                _subFrom(r_, m_);
-            }
-
-            return r_;
-        }
-    }
-
-    function modshl1AssignTo(uint256 to_, uint256 a_, uint256 m_) internal pure {
-        unchecked {
-            _shl1(a_, to_);
-
-            if (cmp(to_, m_) >= 0) {
-                _subFrom(to_, m_);
-            }
-        }
-    }
-
-    /// @dev Stores modinv into `b_` and moddiv into `a_`.
-    function moddivAssign(uint256 call_, uint256 a_, uint256 b_) internal view {
-        unchecked {
-            assembly {
-                call_ := add(call_, _INV_OFFSET)
-
-                mstore(add(0x60, call_), mload(b_))
-                mstore(add(0x80, call_), mload(add(b_, 0x20)))
-
-                pop(staticcall(gas(), 0x5, call_, 0x0120, b_, 0x40))
-            }
-
-            modmulAssign(call_ - _INV_OFFSET, a_, b_);
-        }
-    }
-
-    function moddiv(
-        uint256 call_,
-        uint256 a_,
-        uint256 b_,
-        uint256 m_
-    ) internal view returns (uint256 r_) {
-        unchecked {
-            r_ = modinv(call_, b_, m_);
-
-            _mul(a_, r_, call_ + 0x60);
-
-            assembly {
-                mstore(call_, 0x60)
-                mstore(add(0x20, call_), 0x20)
-                mstore(add(0x40, call_), 0x40)
-                mstore(add(0xC0, call_), 0x01)
-                mstore(add(0xE0, call_), mload(m_))
-                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
-
-                pop(staticcall(gas(), 0x5, call_, 0x0120, r_, 0x40))
-            }
-        }
-    }
-
-    function modinv(uint256 call_, uint256 b_, uint256 m_) internal view returns (uint256 r_) {
-        unchecked {
-            r_ = _allocate(_UINT384_ALLOCATION);
-
-            _sub(m_, fromUint256(2), call_ + 0xA0);
-
-            assembly {
-                mstore(call_, 0x40)
-                mstore(add(0x20, call_), 0x40)
-                mstore(add(0x40, call_), 0x40)
-                mstore(add(0x60, call_), mload(b_))
-                mstore(add(0x80, call_), mload(add(b_, 0x20)))
-                mstore(add(0xE0, call_), mload(m_))
-                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
-
-                pop(staticcall(gas(), 0x5, call_, 0x0120, r_, 0x40))
-            }
-        }
-    }
-
-    function _shl1(uint256 a_, uint256 r_) internal pure {
-        assembly {
-            let a1_ := mload(add(a_, 0x20))
-
-            mstore(r_, or(shl(1, mload(a_)), shr(255, a1_)))
-            mstore(add(r_, 0x20), shl(1, a1_))
-        }
-    }
-
-    function _add(uint256 a_, uint256 b_, uint256 r_) private pure {
-        assembly {
-            let aWord_ := mload(add(a_, 0x20))
-            let sum_ := add(aWord_, mload(add(b_, 0x20)))
-
-            mstore(add(r_, 0x20), sum_)
-
-            sum_ := gt(aWord_, sum_)
-            sum_ := add(sum_, add(mload(a_), mload(b_)))
-
-            mstore(r_, sum_)
-        }
-    }
-
-    function _sub(uint256 a_, uint256 b_, uint256 r_) private pure {
-        assembly {
-            let aWord_ := mload(add(a_, 0x20))
-            let diff_ := sub(aWord_, mload(add(b_, 0x20)))
-
-            mstore(add(r_, 0x20), diff_)
-
-            diff_ := gt(diff_, aWord_)
-            diff_ := sub(sub(mload(a_), mload(b_)), diff_)
-
-            mstore(r_, diff_)
-        }
-    }
-
-    function _subFrom(uint256 a_, uint256 b_) private pure {
-        assembly {
-            let aWord_ := mload(add(a_, 0x20))
-            let diff_ := sub(aWord_, mload(add(b_, 0x20)))
-
-            mstore(add(a_, 0x20), diff_)
-
-            diff_ := gt(diff_, aWord_)
-            diff_ := sub(sub(mload(a_), mload(b_)), diff_)
-
-            mstore(a_, diff_)
-        }
-    }
-
-    function _addTo(uint256 a_, uint256 b_) private pure {
-        assembly {
-            let aWord_ := mload(add(a_, 0x20))
-            let sum_ := add(aWord_, mload(add(b_, 0x20)))
-
-            mstore(add(a_, 0x20), sum_)
-
-            sum_ := gt(aWord_, sum_)
-            sum_ := add(sum_, add(mload(a_), mload(b_)))
-
-            mstore(a_, sum_)
-        }
-    }
-
-    function _mul(uint256 a_, uint256 b_, uint256 r_) private pure {
-        unchecked {
-            assembly {
-                let a0_ := mload(a_)
-                let a1_ := mload(add(a_, 0x20))
-                let b0_ := mload(b_)
-                let b1_ := mload(add(b_, 0x20))
-
-                let mm_ := mulmod(
-                    a1_,
-                    b1_,
-                    0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
-                )
-                let c3_ := mul(a1_, b1_)
-                let c2_ := sub(sub(mm_, c3_), lt(mm_, c3_))
-
-                mm_ := mulmod(
-                    a0_,
-                    b1_,
-                    0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
-                )
-                let prod1_ := mul(a0_, b1_)
-                let prod0_ := sub(sub(mm_, prod1_), lt(mm_, prod1_))
-
-                c2_ := add(c2_, prod1_)
-                let c1_ := lt(c2_, prod1_)
-                c1_ := add(c1_, prod0_)
-
-                mm_ := mulmod(
-                    a1_,
-                    b0_,
-                    0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
-                )
-                prod1_ := mul(a1_, b0_)
-                prod0_ := sub(sub(mm_, prod1_), lt(mm_, prod1_))
-
-                c2_ := add(c2_, prod1_)
-                c1_ := add(c1_, lt(c2_, prod1_))
-                c1_ := add(c1_, prod0_)
-                c1_ := add(c1_, mul(a0_, b0_))
-
-                mstore(add(r_, 0x40), c3_)
-                mstore(add(r_, 0x20), c2_)
-                mstore(r_, c1_)
-            }
-        }
-    }
-
-    function _allocate(uint256 bytes_) private pure returns (uint256 handler_) {
-        unchecked {
-            assembly {
-                handler_ := mload(0x40)
-                mstore(0x40, add(handler_, bytes_))
-            }
-
-            return handler_;
-        }
-    }
-}
diff --git a/contracts/libs/crypto/opt/ECDSA384.sol b/contracts/libs/crypto/opt/ECDSA384.sol
new file mode 100644
index 00000000..e8b5cd37
--- /dev/null
+++ b/contracts/libs/crypto/opt/ECDSA384.sol
@@ -0,0 +1,467 @@
+// SPDX-License-Identifier: MIT
+pragma solidity ^0.8.4;
+
+import {MemoryUtils} from "../../utils/MemoryUtils.sol";
+import {_U384} from "./U384.sol";
+
+/**
+ * @notice Cryptography module
+ *
+ * This library provides functionality for ECDSA verification over any 384-bit curve. Currently,
+ * this is the most efficient implementation out there, consuming ~7.767 million gas per call.
+ *
+ * The approach is Strauss-Shamir double scalar multiplication with 6 bits of precompute + affine coordinates.
+ * For reference, naive implementation uses ~400 billion gas, which is ~50000 times more expensive.
+ *
+ * We also tried using projective coordinates, however, the gas consumption rose to ~9 million gas.
+ */
+library ECDSA384 {
+    using MemoryUtils for *;
+
+    /**
+     * @notice 384-bit curve parameters.
+     */
+    struct Parameters {
+        bytes a;
+        bytes b;
+        bytes gx;
+        bytes gy;
+        bytes p;
+        bytes n;
+        bytes lowSmax;
+    }
+
+    struct _Parameters {
+        uint256 a;
+        uint256 b;
+        uint256 gx;
+        uint256 gy;
+        uint256 p;
+        uint256 n;
+        uint256 lowSmax;
+    }
+
+    struct _Inputs {
+        uint256 r;
+        uint256 s;
+        uint256 x;
+        uint256 y;
+    }
+
+    /**
+     * @notice The function to verify the ECDSA signature
+     * @param curveParams_ the 384-bit curve parameters. `lowSmax` is `n / 2`.
+     * @param hashedMessage_ the already hashed message to be verified.
+     * @param signature_ the ECDSA signature. Equals to `bytes(r) + bytes(s)`.
+     * @param pubKey_ the full public key of a signer. Equals to `bytes(x) + bytes(y)`.
+     *
+     * Note that signatures only from the lower part of the curve are accepted.
+     * If your `s > n / 2`, change it to `s = n - s`.
+     */
+    function verify(
+        Parameters memory curveParams_,
+        bytes memory hashedMessage_,
+        bytes memory signature_,
+        bytes memory pubKey_
+    ) internal view returns (bool) {
+        unchecked {
+            _Inputs memory inputs_;
+
+            (inputs_.r, inputs_.s) = _u384FromBytes2(signature_);
+            (inputs_.x, inputs_.y) = _u384FromBytes2(pubKey_);
+
+            _Parameters memory params_ = _Parameters({
+                a: _U384.fromBytes(curveParams_.a),
+                b: _U384.fromBytes(curveParams_.b),
+                gx: _U384.fromBytes(curveParams_.gx),
+                gy: _U384.fromBytes(curveParams_.gy),
+                p: _U384.fromBytes(curveParams_.p),
+                n: _U384.fromBytes(curveParams_.n),
+                lowSmax: _U384.fromBytes(curveParams_.lowSmax)
+            });
+
+            uint256 call_ = _U384.initCall(params_.p);
+
+            /// accept s only from the lower part of the curve
+            if (
+                _U384.eqUint256(inputs_.r, 0) ||
+                _U384.cmp(inputs_.r, params_.n) >= 0 ||
+                _U384.eqUint256(inputs_.s, 0) ||
+                _U384.cmp(inputs_.s, params_.lowSmax) > 0
+            ) {
+                return false;
+            }
+
+            if (!_isOnCurve(call_, params_.p, params_.a, params_.b, inputs_.x, inputs_.y)) {
+                return false;
+            }
+
+            uint256 scalar1_ = _U384.moddiv(
+                call_,
+                _U384.fromBytes(hashedMessage_),
+                inputs_.s,
+                params_.n
+            );
+            uint256 scalar2_ = _U384.moddiv(call_, inputs_.r, inputs_.s, params_.n);
+
+            {
+                uint256 three_ = _U384.fromUint256(3);
+
+                /// We use 6-bit masks where the first 3 bits refer to `scalar1` and the last 3 bits refer to `scalar2`.
+                uint256[2][64] memory points_ = _precomputePointsTable(
+                    call_,
+                    params_.p,
+                    three_,
+                    params_.a,
+                    inputs_.x,
+                    inputs_.y,
+                    params_.gx,
+                    params_.gy
+                );
+
+                (scalar1_, ) = _doubleScalarMultiplication(
+                    call_,
+                    params_.p,
+                    three_,
+                    params_.a,
+                    points_,
+                    scalar1_,
+                    scalar2_
+                );
+            }
+
+            _U384.modAssign(call_, scalar1_, params_.n);
+
+            return _U384.eq(scalar1_, inputs_.r);
+        }
+    }
+
+    /**
+     * @dev Check if a point in affine coordinates is on the curve.
+     */
+    function _isOnCurve(
+        uint256 call_,
+        uint256 p_,
+        uint256 a_,
+        uint256 b_,
+        uint256 x_,
+        uint256 y_
+    ) private view returns (bool) {
+        unchecked {
+            if (
+                _U384.eqUint256(x_, 0) ||
+                _U384.eq(x_, p_) ||
+                _U384.eqUint256(y_, 0) ||
+                _U384.eq(y_, p_)
+            ) {
+                return false;
+            }
+
+            uint256 lhs_ = _U384.modexp(call_, y_, 2);
+            uint256 rhs_ = _U384.modexp(call_, x_, 3);
+
+            if (!_U384.eqUint256(a_, 0)) {
+                rhs_ = _U384.modadd(rhs_, _U384.modmul(call_, x_, a_), p_); // x^3 + a*x
+            }
+
+            if (!_U384.eqUint256(b_, 0)) {
+                rhs_ = _U384.modadd(rhs_, b_, p_); // x^3 + a*x + b
+            }
+
+            return _U384.eq(lhs_, rhs_);
+        }
+    }
+
+    /**
+     * @dev Compute the Strauss-Shamir double scalar multiplication scalar1*G + scalar2*H.
+     */
+    function _doubleScalarMultiplication(
+        uint256 call_,
+        uint256 p_,
+        uint256 three_,
+        uint256 a_,
+        uint256[2][64] memory points_,
+        uint256 scalar1_,
+        uint256 scalar2_
+    ) private view returns (uint256 x_, uint256 y_) {
+        unchecked {
+            uint256 mask_;
+            uint256 mask1_;
+            uint256 mask2_;
+
+            for (uint256 bit = 3; bit <= 384; bit += 3) {
+                mask1_ = _getWord(scalar1_, 384 - bit);
+                mask2_ = _getWord(scalar2_, 384 - bit);
+
+                mask_ = (mask1_ << 3) | mask2_;
+
+                (x_, y_) = _twice3Affine(call_, p_, three_, a_, x_, y_);
+
+                if (mask_ != 0) {
+                    (x_, y_) = _addAffine(
+                        call_,
+                        p_,
+                        three_,
+                        a_,
+                        points_[mask_][0],
+                        points_[mask_][1],
+                        x_,
+                        y_
+                    );
+                }
+            }
+
+            return (x_, y_);
+        }
+    }
+
+    function _getWord(uint256 scalar_, uint256 bit_) private pure returns (uint256) {
+        unchecked {
+            uint256 word_;
+            if (bit_ <= 253) {
+                assembly {
+                    word_ := mload(add(scalar_, 0x20))
+                }
+
+                return (word_ >> bit_) & 0x07;
+            }
+
+            assembly {
+                word_ := mload(add(scalar_, 0x10))
+            }
+
+            return (word_ >> (bit_ - 128)) & 0x07;
+        }
+    }
+
+    /**
+     * @dev Double an elliptic curve point in affine coordinates.
+     */
+    function _twiceAffine(
+        uint256 call_,
+        uint256 p_,
+        uint256 three_,
+        uint256 a_,
+        uint256 x1_,
+        uint256 y1_
+    ) private view returns (uint256 x2_, uint256 y2_) {
+        unchecked {
+            if (x1_ == 0) {
+                return (0, 0);
+            }
+
+            if (_U384.eqUint256(y1_, 0)) {
+                return (0, 0);
+            }
+
+            uint256 m1_ = _U384.modexp(call_, x1_, 2);
+            _U384.modmulAssign(call_, m1_, three_);
+            _U384.modaddAssign(m1_, a_, p_);
+
+            uint256 m2_ = _U384.modshl1(y1_, p_);
+            _U384.moddivAssign(call_, m1_, m2_);
+
+            x2_ = _U384.modexp(call_, m1_, 2);
+            _U384.modsubAssign(x2_, x1_, p_);
+            _U384.modsubAssign(x2_, x1_, p_);
+
+            y2_ = _U384.modsub(x1_, x2_, p_);
+            _U384.modmulAssign(call_, y2_, m1_);
+            _U384.modsubAssign(y2_, y1_, p_);
+        }
+    }
+
+    /**
+     * @dev Doubles an elliptic curve point 3 times in affine coordinates.
+     */
+    function _twice3Affine(
+        uint256 call_,
+        uint256 p_,
+        uint256 three_,
+        uint256 a_,
+        uint256 x1_,
+        uint256 y1_
+    ) private view returns (uint256 x2_, uint256 y2_) {
+        unchecked {
+            if (x1_ == 0) {
+                return (0, 0);
+            }
+
+            if (_U384.eqUint256(y1_, 0)) {
+                return (0, 0);
+            }
+
+            uint256 m1 = _U384.modexp(call_, x1_, 2);
+            _U384.modmulAssign(call_, m1, three_);
+            _U384.modaddAssign(m1, a_, p_);
+
+            uint256 m2 = _U384.modshl1(y1_, p_);
+            _U384.moddivAssign(call_, m1, m2);
+
+            x2_ = _U384.modexp(call_, m1, 2);
+            _U384.modsubAssign(x2_, x1_, p_);
+            _U384.modsubAssign(x2_, x1_, p_);
+
+            y2_ = _U384.modsub(x1_, x2_, p_);
+            _U384.modmulAssign(call_, y2_, m1);
+            _U384.modsubAssign(y2_, y1_, p_);
+
+            if (_U384.eqUint256(y2_, 0)) {
+                return (0, 0);
+            }
+
+            _U384.modexpAssignTo(call_, m1, x2_, 2);
+            _U384.modmulAssign(call_, m1, three_);
+            _U384.modaddAssign(m1, a_, p_);
+
+            _U384.modshl1AssignTo(m2, y2_, p_);
+            _U384.moddivAssign(call_, m1, m2);
+
+            _U384.modexpAssignTo(call_, x1_, m1, 2);
+            _U384.modsubAssign(x1_, x2_, p_);
+            _U384.modsubAssign(x1_, x2_, p_);
+
+            _U384.modsubAssignTo(y1_, x2_, x1_, p_);
+            _U384.modmulAssign(call_, y1_, m1);
+            _U384.modsubAssign(y1_, y2_, p_);
+
+            if (_U384.eqUint256(y1_, 0)) {
+                return (0, 0);
+            }
+
+            _U384.modexpAssignTo(call_, m1, x1_, 2);
+            _U384.modmulAssign(call_, m1, three_);
+            _U384.modaddAssign(m1, a_, p_);
+
+            _U384.modshl1AssignTo(m2, y1_, p_);
+            _U384.moddivAssign(call_, m1, m2);
+
+            _U384.modexpAssignTo(call_, x2_, m1, 2);
+            _U384.modsubAssign(x2_, x1_, p_);
+            _U384.modsubAssign(x2_, x1_, p_);
+
+            _U384.modsubAssignTo(y2_, x1_, x2_, p_);
+            _U384.modmulAssign(call_, y2_, m1);
+            _U384.modsubAssign(y2_, y1_, p_);
+        }
+    }
+
+    /**
+     * @dev Add two elliptic curve points in affine coordinates.
+     */
+    function _addAffine(
+        uint256 call_,
+        uint256 p_,
+        uint256 three_,
+        uint256 a_,
+        uint256 x1_,
+        uint256 y1_,
+        uint256 x2_,
+        uint256 y2_
+    ) private view returns (uint256 x3, uint256 y3) {
+        unchecked {
+            if (x1_ == 0 || x2_ == 0) {
+                if (x1_ == 0 && x2_ == 0) {
+                    return (0, 0);
+                }
+
+                return
+                    x1_ == 0
+                        ? (_U384.copy(x2_), _U384.copy(y2_))
+                        : (_U384.copy(x1_), _U384.copy(y1_));
+            }
+
+            if (_U384.eq(x1_, x2_)) {
+                if (_U384.eq(y1_, y2_)) {
+                    return _twiceAffine(call_, p_, three_, a_, x1_, y1_);
+                }
+
+                return (0, 0);
+            }
+
+            uint256 m1_ = _U384.modsub(y1_, y2_, p_);
+            uint256 m2_ = _U384.modsub(x1_, x2_, p_);
+
+            _U384.moddivAssign(call_, m1_, m2_);
+
+            x3 = _U384.modexp(call_, m1_, 2);
+            _U384.modsubAssign(x3, x1_, p_);
+            _U384.modsubAssign(x3, x2_, p_);
+
+            y3 = _U384.modsub(x1_, x3, p_);
+            _U384.modmulAssign(call_, y3, m1_);
+            _U384.modsubAssign(y3, y1_, p_);
+        }
+    }
+
+    function _precomputePointsTable(
+        uint256 call_,
+        uint256 p_,
+        uint256 three_,
+        uint256 a_,
+        uint256 hx_,
+        uint256 hy_,
+        uint256 gx_,
+        uint256 gy_
+    ) private view returns (uint256[2][64] memory points_) {
+        unchecked {
+            (points_[0x01][0], points_[0x01][1]) = (_U384.copy(hx_), _U384.copy(hy_));
+            (points_[0x08][0], points_[0x08][1]) = (_U384.copy(gx_), _U384.copy(gy_));
+
+            for (uint256 i = 0; i < 8; ++i) {
+                for (uint256 j = 0; j < 8; ++j) {
+                    if (i + j < 2) {
+                        continue;
+                    }
+
+                    uint256[2] memory pointTo_ = points_[(i << 3) | j];
+
+                    if (i != 0) {
+                        uint256[2] memory pointFrom_ = points_[((i - 1) << 3) | j];
+
+                        (pointTo_[0], pointTo_[1]) = _addAffine(
+                            call_,
+                            p_,
+                            three_,
+                            a_,
+                            pointFrom_[0],
+                            pointFrom_[1],
+                            gx_,
+                            gy_
+                        );
+                    } else {
+                        uint256[2] memory pointFrom_ = points_[(i << 3) | (j - 1)];
+
+                        (pointTo_[0], pointTo_[1]) = _addAffine(
+                            call_,
+                            p_,
+                            three_,
+                            a_,
+                            pointFrom_[0],
+                            pointFrom_[1],
+                            hx_,
+                            hy_
+                        );
+                    }
+                }
+            }
+
+            return points_;
+        }
+    }
+
+    /**
+     * @dev Convert 96 bytes to two 384-bit unsigned integers.
+     */
+    function _u384FromBytes2(bytes memory bytes_) private view returns (uint256, uint256) {
+        unchecked {
+            bytes memory lhs_ = new bytes(48);
+            bytes memory rhs_ = new bytes(48);
+
+            MemoryUtils.unsafeCopy(bytes_.getDataPointer(), lhs_.getDataPointer(), 48);
+            MemoryUtils.unsafeCopy(bytes_.getDataPointer() + 48, rhs_.getDataPointer(), 48);
+
+            return (_U384.fromBytes(lhs_), _U384.fromBytes(rhs_));
+        }
+    }
+}
diff --git a/contracts/libs/crypto/backend/U384.sol b/contracts/libs/crypto/opt/U384.sol
similarity index 100%
rename from contracts/libs/crypto/backend/U384.sol
rename to contracts/libs/crypto/opt/U384.sol
diff --git a/contracts/mock/libs/crypto/ECDSA384Mock.sol b/contracts/mock/libs/crypto/ECDSA384Mock.sol
index eea369fd..8786b97d 100644
--- a/contracts/mock/libs/crypto/ECDSA384Mock.sol
+++ b/contracts/mock/libs/crypto/ECDSA384Mock.sol
@@ -2,7 +2,8 @@
 pragma solidity ^0.8.4;
 
 import {ECDSA384} from "../../../libs/crypto/ECDSA384.sol";
-import {_U384} from "../../../libs/crypto/backend/U384.sol";
+import {uint512} from "../../../libs/bn/U512.sol";
+import {U512} from "../../../libs/bn/U512.sol";
 
 contract ECDSA384Mock {
     using ECDSA384 for *;
@@ -74,8 +75,8 @@ contract ECDSA384Mock {
     }
 
     function cmpMock() external pure returns (int256 cmp_) {
-        uint256 a_;
-        uint256 b_;
+        uint512 a_;
+        uint512 b_;
 
         assembly {
             a_ := mload(0x40)
@@ -87,6 +88,6 @@ contract ECDSA384Mock {
             mstore(0x40, add(b_, 0x40))
         }
 
-        return _U384.cmp(a_, b_);
+        return U512.cmp(a_, b_);
     }
 }
diff --git a/test/libs/crypto/ECDSA384.test.ts b/test/libs/crypto/ECDSA384.test.ts
index de29aaac..9294b951 100644
--- a/test/libs/crypto/ECDSA384.test.ts
+++ b/test/libs/crypto/ECDSA384.test.ts
@@ -168,7 +168,7 @@ describe("ECDSA384", () => {
     });
   });
 
-  describe("brainpoolP384r1", () => {
+  describe.only("brainpoolP384r1", () => {
     const signature =
       "0x42d803dcea3f9809cda4ce5a541d969dbeacd6ab7bef7788db1e4a00dac3ae87c1c241c24bb39e041725e607718fc322306b08967b56e4e49d7c9afc48833f580ac9b49cdcec0962d564f89a8f0b57a9742573ebcbe709869253e8b466cb33be";
     const pubKey =

From 57f0f8d5358f3ff559bbe16d55cabde0e24d3edf Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Tue, 21 Jan 2025 15:25:37 +0200
Subject: [PATCH 21/42] typo

---
 contracts/libs/crypto/ECDSA384.sol | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contracts/libs/crypto/ECDSA384.sol b/contracts/libs/crypto/ECDSA384.sol
index 944b521f..cbac132a 100644
--- a/contracts/libs/crypto/ECDSA384.sol
+++ b/contracts/libs/crypto/ECDSA384.sol
@@ -9,7 +9,7 @@ import {MemoryUtils} from "../utils/MemoryUtils.sol";
  * @notice Cryptography module
  *
  * This library provides functionality for ECDSA verification over any 384-bit curve. Currently,
- * this is the most efficient implementation out there, consuming ~20.2 million gas per call.
+ * this is the most efficient implementation out there, consuming ~14.1 million gas per call.
  *
  * The approach is Strauss-Shamir double scalar multiplication with 6 bits of precompute + affine coordinates.
  */

From 6db51413b5d8443597288bc6a03903dd87c4adc4 Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Tue, 21 Jan 2025 15:44:46 +0200
Subject: [PATCH 22/42] added crazy optimization with bit skipping 20.1kk

---
 contracts/libs/crypto/ECDSA512.sol | 44 ++++++++++++++++++++----------
 1 file changed, 30 insertions(+), 14 deletions(-)

diff --git a/contracts/libs/crypto/ECDSA512.sol b/contracts/libs/crypto/ECDSA512.sol
index 584e6cde..53bc1d86 100644
--- a/contracts/libs/crypto/ECDSA512.sol
+++ b/contracts/libs/crypto/ECDSA512.sol
@@ -188,24 +188,40 @@ library ECDSA512 {
             uint256 mask1_;
             uint256 mask2_;
 
-            // skip first two bits
-            assembly {
-                mask1_ := shr(254, mload(scalar1_))
-                mask2_ := shr(254, mload(scalar2_))
-                mask_ := or(shl(3, mask1_), mask2_)
-            }
+            for (uint256 bit = 3; bit <= 514; ) {
+                if (bit <= 512) {
+                    mask1_ = _getWord(scalar1_, 512 - bit);
+                    mask2_ = _getWord(scalar2_, 512 - bit);
+
+                    if ((mask1_ >> 2) == 0 && (mask2_ >> 2) == 0) {
+                        (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
+                        ++bit;
+                        continue;
+                    }
 
-            (x_, y_) = (U512.copy(points_[mask_][0]), U512.copy(points_[mask_][1]));
+                    (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
+                    (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
+                    (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
 
-            for (uint256 bit = 5; bit <= 512; bit += 3) {
-                mask1_ = _getWord(scalar1_, 512 - bit);
-                mask2_ = _getWord(scalar2_, 512 - bit);
+                    bit += 3;
+                } else if (bit == 513) {
+                    mask1_ = _getWord(scalar1_, 0) & 0x03;
+                    mask2_ = _getWord(scalar2_, 0) & 0x03;
 
-                mask_ = (mask1_ << 3) | mask2_;
+                    (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
+                    (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
 
-                (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
-                (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
-                (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
+                    bit += 2;
+                } else {
+                    mask1_ = _getWord(scalar1_, 0) & 0x01;
+                    mask2_ = _getWord(scalar2_, 0) & 0x01;
+
+                    (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
+
+                    ++bit;
+                }
+
+                mask_ = (mask1_ << 3) | mask2_;
 
                 if (mask_ != 0) {
                     (x_, y_) = _addAffine(

From b9462a75ce95af0b8f6445d790a0a219d53ffd02 Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Tue, 21 Jan 2025 15:56:25 +0200
Subject: [PATCH 23/42] 13.86kk & typos

---
 contracts/libs/crypto/ECDSA384.sol | 41 ++++++++++++++++++++++++------
 contracts/libs/crypto/ECDSA512.sol |  2 +-
 2 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/contracts/libs/crypto/ECDSA384.sol b/contracts/libs/crypto/ECDSA384.sol
index cbac132a..8e4e9992 100644
--- a/contracts/libs/crypto/ECDSA384.sol
+++ b/contracts/libs/crypto/ECDSA384.sol
@@ -9,7 +9,7 @@ import {MemoryUtils} from "../utils/MemoryUtils.sol";
  * @notice Cryptography module
  *
  * This library provides functionality for ECDSA verification over any 384-bit curve. Currently,
- * this is the most efficient implementation out there, consuming ~14.1 million gas per call.
+ * this is the most efficient implementation out there, consuming ~13.86 million gas per call.
  *
  * The approach is Strauss-Shamir double scalar multiplication with 6 bits of precompute + affine coordinates.
  */
@@ -188,15 +188,40 @@ library ECDSA384 {
             uint256 mask1_;
             uint256 mask2_;
 
-            for (uint256 bit = 3; bit <= 384; bit += 3) {
-                mask1_ = _getWord(scalar1_, 384 - bit);
-                mask2_ = _getWord(scalar2_, 384 - bit);
+            for (uint256 bit = 3; bit <= 386; ) {
+                if (bit <= 384) {
+                    mask1_ = _getWord(scalar1_, 384 - bit);
+                    mask2_ = _getWord(scalar2_, 384 - bit);
 
-                mask_ = (mask1_ << 3) | mask2_;
+                    if ((mask1_ >> 2) == 0 && (mask2_ >> 2) == 0) {
+                        (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
+                        ++bit;
+                        continue;
+                    }
+
+                    (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
+                    (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
+                    (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
+
+                    bit += 3;
+                } else if (bit == 385) {
+                    mask1_ = _getWord(scalar1_, 0) & 0x03;
+                    mask2_ = _getWord(scalar2_, 0) & 0x03;
+
+                    (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
+                    (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
 
-                (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
-                (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
-                (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
+                    bit += 2;
+                } else {
+                    mask1_ = _getWord(scalar1_, 0) & 0x01;
+                    mask2_ = _getWord(scalar2_, 0) & 0x01;
+
+                    (x_, y_) = _twiceAffine(call_, p_, two_, three_, a_, x_, y_);
+
+                    ++bit;
+                }
+
+                mask_ = (mask1_ << 3) | mask2_;
 
                 if (mask_ != 0) {
                     (x_, y_) = _addAffine(
diff --git a/contracts/libs/crypto/ECDSA512.sol b/contracts/libs/crypto/ECDSA512.sol
index 53bc1d86..ba6d2bc9 100644
--- a/contracts/libs/crypto/ECDSA512.sol
+++ b/contracts/libs/crypto/ECDSA512.sol
@@ -9,7 +9,7 @@ import {MemoryUtils} from "../utils/MemoryUtils.sol";
  * @notice Cryptography module
  *
  * This library provides functionality for ECDSA verification over any 512-bit curve. Currently,
- * this is the most efficient implementation out there, consuming ~20.2 million gas per call.
+ * this is the most efficient implementation out there, consuming ~20.1 million gas per call.
  *
  * The approach is Strauss-Shamir double scalar multiplication with 6 bits of precompute + affine coordinates.
  */

From 288f0c1c5420ad0c2eaaf07f2ef066da93e5a8b3 Mon Sep 17 00:00:00 2001
From: mllwchrry <mariia.zhvanko@gmail.com>
Date: Thu, 23 Jan 2025 13:43:09 +0200
Subject: [PATCH 24/42] remove opt 384 libs

---
 contracts/libs/crypto/opt/ECDSA384.sol | 467 -----------------------
 contracts/libs/crypto/opt/U384.sol     | 498 -------------------------
 2 files changed, 965 deletions(-)
 delete mode 100644 contracts/libs/crypto/opt/ECDSA384.sol
 delete mode 100644 contracts/libs/crypto/opt/U384.sol

diff --git a/contracts/libs/crypto/opt/ECDSA384.sol b/contracts/libs/crypto/opt/ECDSA384.sol
deleted file mode 100644
index e8b5cd37..00000000
--- a/contracts/libs/crypto/opt/ECDSA384.sol
+++ /dev/null
@@ -1,467 +0,0 @@
-// SPDX-License-Identifier: MIT
-pragma solidity ^0.8.4;
-
-import {MemoryUtils} from "../../utils/MemoryUtils.sol";
-import {_U384} from "./U384.sol";
-
-/**
- * @notice Cryptography module
- *
- * This library provides functionality for ECDSA verification over any 384-bit curve. Currently,
- * this is the most efficient implementation out there, consuming ~7.767 million gas per call.
- *
- * The approach is Strauss-Shamir double scalar multiplication with 6 bits of precompute + affine coordinates.
- * For reference, naive implementation uses ~400 billion gas, which is ~50000 times more expensive.
- *
- * We also tried using projective coordinates, however, the gas consumption rose to ~9 million gas.
- */
-library ECDSA384 {
-    using MemoryUtils for *;
-
-    /**
-     * @notice 384-bit curve parameters.
-     */
-    struct Parameters {
-        bytes a;
-        bytes b;
-        bytes gx;
-        bytes gy;
-        bytes p;
-        bytes n;
-        bytes lowSmax;
-    }
-
-    struct _Parameters {
-        uint256 a;
-        uint256 b;
-        uint256 gx;
-        uint256 gy;
-        uint256 p;
-        uint256 n;
-        uint256 lowSmax;
-    }
-
-    struct _Inputs {
-        uint256 r;
-        uint256 s;
-        uint256 x;
-        uint256 y;
-    }
-
-    /**
-     * @notice The function to verify the ECDSA signature
-     * @param curveParams_ the 384-bit curve parameters. `lowSmax` is `n / 2`.
-     * @param hashedMessage_ the already hashed message to be verified.
-     * @param signature_ the ECDSA signature. Equals to `bytes(r) + bytes(s)`.
-     * @param pubKey_ the full public key of a signer. Equals to `bytes(x) + bytes(y)`.
-     *
-     * Note that signatures only from the lower part of the curve are accepted.
-     * If your `s > n / 2`, change it to `s = n - s`.
-     */
-    function verify(
-        Parameters memory curveParams_,
-        bytes memory hashedMessage_,
-        bytes memory signature_,
-        bytes memory pubKey_
-    ) internal view returns (bool) {
-        unchecked {
-            _Inputs memory inputs_;
-
-            (inputs_.r, inputs_.s) = _u384FromBytes2(signature_);
-            (inputs_.x, inputs_.y) = _u384FromBytes2(pubKey_);
-
-            _Parameters memory params_ = _Parameters({
-                a: _U384.fromBytes(curveParams_.a),
-                b: _U384.fromBytes(curveParams_.b),
-                gx: _U384.fromBytes(curveParams_.gx),
-                gy: _U384.fromBytes(curveParams_.gy),
-                p: _U384.fromBytes(curveParams_.p),
-                n: _U384.fromBytes(curveParams_.n),
-                lowSmax: _U384.fromBytes(curveParams_.lowSmax)
-            });
-
-            uint256 call_ = _U384.initCall(params_.p);
-
-            /// accept s only from the lower part of the curve
-            if (
-                _U384.eqUint256(inputs_.r, 0) ||
-                _U384.cmp(inputs_.r, params_.n) >= 0 ||
-                _U384.eqUint256(inputs_.s, 0) ||
-                _U384.cmp(inputs_.s, params_.lowSmax) > 0
-            ) {
-                return false;
-            }
-
-            if (!_isOnCurve(call_, params_.p, params_.a, params_.b, inputs_.x, inputs_.y)) {
-                return false;
-            }
-
-            uint256 scalar1_ = _U384.moddiv(
-                call_,
-                _U384.fromBytes(hashedMessage_),
-                inputs_.s,
-                params_.n
-            );
-            uint256 scalar2_ = _U384.moddiv(call_, inputs_.r, inputs_.s, params_.n);
-
-            {
-                uint256 three_ = _U384.fromUint256(3);
-
-                /// We use 6-bit masks where the first 3 bits refer to `scalar1` and the last 3 bits refer to `scalar2`.
-                uint256[2][64] memory points_ = _precomputePointsTable(
-                    call_,
-                    params_.p,
-                    three_,
-                    params_.a,
-                    inputs_.x,
-                    inputs_.y,
-                    params_.gx,
-                    params_.gy
-                );
-
-                (scalar1_, ) = _doubleScalarMultiplication(
-                    call_,
-                    params_.p,
-                    three_,
-                    params_.a,
-                    points_,
-                    scalar1_,
-                    scalar2_
-                );
-            }
-
-            _U384.modAssign(call_, scalar1_, params_.n);
-
-            return _U384.eq(scalar1_, inputs_.r);
-        }
-    }
-
-    /**
-     * @dev Check if a point in affine coordinates is on the curve.
-     */
-    function _isOnCurve(
-        uint256 call_,
-        uint256 p_,
-        uint256 a_,
-        uint256 b_,
-        uint256 x_,
-        uint256 y_
-    ) private view returns (bool) {
-        unchecked {
-            if (
-                _U384.eqUint256(x_, 0) ||
-                _U384.eq(x_, p_) ||
-                _U384.eqUint256(y_, 0) ||
-                _U384.eq(y_, p_)
-            ) {
-                return false;
-            }
-
-            uint256 lhs_ = _U384.modexp(call_, y_, 2);
-            uint256 rhs_ = _U384.modexp(call_, x_, 3);
-
-            if (!_U384.eqUint256(a_, 0)) {
-                rhs_ = _U384.modadd(rhs_, _U384.modmul(call_, x_, a_), p_); // x^3 + a*x
-            }
-
-            if (!_U384.eqUint256(b_, 0)) {
-                rhs_ = _U384.modadd(rhs_, b_, p_); // x^3 + a*x + b
-            }
-
-            return _U384.eq(lhs_, rhs_);
-        }
-    }
-
-    /**
-     * @dev Compute the Strauss-Shamir double scalar multiplication scalar1*G + scalar2*H.
-     */
-    function _doubleScalarMultiplication(
-        uint256 call_,
-        uint256 p_,
-        uint256 three_,
-        uint256 a_,
-        uint256[2][64] memory points_,
-        uint256 scalar1_,
-        uint256 scalar2_
-    ) private view returns (uint256 x_, uint256 y_) {
-        unchecked {
-            uint256 mask_;
-            uint256 mask1_;
-            uint256 mask2_;
-
-            for (uint256 bit = 3; bit <= 384; bit += 3) {
-                mask1_ = _getWord(scalar1_, 384 - bit);
-                mask2_ = _getWord(scalar2_, 384 - bit);
-
-                mask_ = (mask1_ << 3) | mask2_;
-
-                (x_, y_) = _twice3Affine(call_, p_, three_, a_, x_, y_);
-
-                if (mask_ != 0) {
-                    (x_, y_) = _addAffine(
-                        call_,
-                        p_,
-                        three_,
-                        a_,
-                        points_[mask_][0],
-                        points_[mask_][1],
-                        x_,
-                        y_
-                    );
-                }
-            }
-
-            return (x_, y_);
-        }
-    }
-
-    function _getWord(uint256 scalar_, uint256 bit_) private pure returns (uint256) {
-        unchecked {
-            uint256 word_;
-            if (bit_ <= 253) {
-                assembly {
-                    word_ := mload(add(scalar_, 0x20))
-                }
-
-                return (word_ >> bit_) & 0x07;
-            }
-
-            assembly {
-                word_ := mload(add(scalar_, 0x10))
-            }
-
-            return (word_ >> (bit_ - 128)) & 0x07;
-        }
-    }
-
-    /**
-     * @dev Double an elliptic curve point in affine coordinates.
-     */
-    function _twiceAffine(
-        uint256 call_,
-        uint256 p_,
-        uint256 three_,
-        uint256 a_,
-        uint256 x1_,
-        uint256 y1_
-    ) private view returns (uint256 x2_, uint256 y2_) {
-        unchecked {
-            if (x1_ == 0) {
-                return (0, 0);
-            }
-
-            if (_U384.eqUint256(y1_, 0)) {
-                return (0, 0);
-            }
-
-            uint256 m1_ = _U384.modexp(call_, x1_, 2);
-            _U384.modmulAssign(call_, m1_, three_);
-            _U384.modaddAssign(m1_, a_, p_);
-
-            uint256 m2_ = _U384.modshl1(y1_, p_);
-            _U384.moddivAssign(call_, m1_, m2_);
-
-            x2_ = _U384.modexp(call_, m1_, 2);
-            _U384.modsubAssign(x2_, x1_, p_);
-            _U384.modsubAssign(x2_, x1_, p_);
-
-            y2_ = _U384.modsub(x1_, x2_, p_);
-            _U384.modmulAssign(call_, y2_, m1_);
-            _U384.modsubAssign(y2_, y1_, p_);
-        }
-    }
-
-    /**
-     * @dev Doubles an elliptic curve point 3 times in affine coordinates.
-     */
-    function _twice3Affine(
-        uint256 call_,
-        uint256 p_,
-        uint256 three_,
-        uint256 a_,
-        uint256 x1_,
-        uint256 y1_
-    ) private view returns (uint256 x2_, uint256 y2_) {
-        unchecked {
-            if (x1_ == 0) {
-                return (0, 0);
-            }
-
-            if (_U384.eqUint256(y1_, 0)) {
-                return (0, 0);
-            }
-
-            uint256 m1 = _U384.modexp(call_, x1_, 2);
-            _U384.modmulAssign(call_, m1, three_);
-            _U384.modaddAssign(m1, a_, p_);
-
-            uint256 m2 = _U384.modshl1(y1_, p_);
-            _U384.moddivAssign(call_, m1, m2);
-
-            x2_ = _U384.modexp(call_, m1, 2);
-            _U384.modsubAssign(x2_, x1_, p_);
-            _U384.modsubAssign(x2_, x1_, p_);
-
-            y2_ = _U384.modsub(x1_, x2_, p_);
-            _U384.modmulAssign(call_, y2_, m1);
-            _U384.modsubAssign(y2_, y1_, p_);
-
-            if (_U384.eqUint256(y2_, 0)) {
-                return (0, 0);
-            }
-
-            _U384.modexpAssignTo(call_, m1, x2_, 2);
-            _U384.modmulAssign(call_, m1, three_);
-            _U384.modaddAssign(m1, a_, p_);
-
-            _U384.modshl1AssignTo(m2, y2_, p_);
-            _U384.moddivAssign(call_, m1, m2);
-
-            _U384.modexpAssignTo(call_, x1_, m1, 2);
-            _U384.modsubAssign(x1_, x2_, p_);
-            _U384.modsubAssign(x1_, x2_, p_);
-
-            _U384.modsubAssignTo(y1_, x2_, x1_, p_);
-            _U384.modmulAssign(call_, y1_, m1);
-            _U384.modsubAssign(y1_, y2_, p_);
-
-            if (_U384.eqUint256(y1_, 0)) {
-                return (0, 0);
-            }
-
-            _U384.modexpAssignTo(call_, m1, x1_, 2);
-            _U384.modmulAssign(call_, m1, three_);
-            _U384.modaddAssign(m1, a_, p_);
-
-            _U384.modshl1AssignTo(m2, y1_, p_);
-            _U384.moddivAssign(call_, m1, m2);
-
-            _U384.modexpAssignTo(call_, x2_, m1, 2);
-            _U384.modsubAssign(x2_, x1_, p_);
-            _U384.modsubAssign(x2_, x1_, p_);
-
-            _U384.modsubAssignTo(y2_, x1_, x2_, p_);
-            _U384.modmulAssign(call_, y2_, m1);
-            _U384.modsubAssign(y2_, y1_, p_);
-        }
-    }
-
-    /**
-     * @dev Add two elliptic curve points in affine coordinates.
-     */
-    function _addAffine(
-        uint256 call_,
-        uint256 p_,
-        uint256 three_,
-        uint256 a_,
-        uint256 x1_,
-        uint256 y1_,
-        uint256 x2_,
-        uint256 y2_
-    ) private view returns (uint256 x3, uint256 y3) {
-        unchecked {
-            if (x1_ == 0 || x2_ == 0) {
-                if (x1_ == 0 && x2_ == 0) {
-                    return (0, 0);
-                }
-
-                return
-                    x1_ == 0
-                        ? (_U384.copy(x2_), _U384.copy(y2_))
-                        : (_U384.copy(x1_), _U384.copy(y1_));
-            }
-
-            if (_U384.eq(x1_, x2_)) {
-                if (_U384.eq(y1_, y2_)) {
-                    return _twiceAffine(call_, p_, three_, a_, x1_, y1_);
-                }
-
-                return (0, 0);
-            }
-
-            uint256 m1_ = _U384.modsub(y1_, y2_, p_);
-            uint256 m2_ = _U384.modsub(x1_, x2_, p_);
-
-            _U384.moddivAssign(call_, m1_, m2_);
-
-            x3 = _U384.modexp(call_, m1_, 2);
-            _U384.modsubAssign(x3, x1_, p_);
-            _U384.modsubAssign(x3, x2_, p_);
-
-            y3 = _U384.modsub(x1_, x3, p_);
-            _U384.modmulAssign(call_, y3, m1_);
-            _U384.modsubAssign(y3, y1_, p_);
-        }
-    }
-
-    function _precomputePointsTable(
-        uint256 call_,
-        uint256 p_,
-        uint256 three_,
-        uint256 a_,
-        uint256 hx_,
-        uint256 hy_,
-        uint256 gx_,
-        uint256 gy_
-    ) private view returns (uint256[2][64] memory points_) {
-        unchecked {
-            (points_[0x01][0], points_[0x01][1]) = (_U384.copy(hx_), _U384.copy(hy_));
-            (points_[0x08][0], points_[0x08][1]) = (_U384.copy(gx_), _U384.copy(gy_));
-
-            for (uint256 i = 0; i < 8; ++i) {
-                for (uint256 j = 0; j < 8; ++j) {
-                    if (i + j < 2) {
-                        continue;
-                    }
-
-                    uint256[2] memory pointTo_ = points_[(i << 3) | j];
-
-                    if (i != 0) {
-                        uint256[2] memory pointFrom_ = points_[((i - 1) << 3) | j];
-
-                        (pointTo_[0], pointTo_[1]) = _addAffine(
-                            call_,
-                            p_,
-                            three_,
-                            a_,
-                            pointFrom_[0],
-                            pointFrom_[1],
-                            gx_,
-                            gy_
-                        );
-                    } else {
-                        uint256[2] memory pointFrom_ = points_[(i << 3) | (j - 1)];
-
-                        (pointTo_[0], pointTo_[1]) = _addAffine(
-                            call_,
-                            p_,
-                            three_,
-                            a_,
-                            pointFrom_[0],
-                            pointFrom_[1],
-                            hx_,
-                            hy_
-                        );
-                    }
-                }
-            }
-
-            return points_;
-        }
-    }
-
-    /**
-     * @dev Convert 96 bytes to two 384-bit unsigned integers.
-     */
-    function _u384FromBytes2(bytes memory bytes_) private view returns (uint256, uint256) {
-        unchecked {
-            bytes memory lhs_ = new bytes(48);
-            bytes memory rhs_ = new bytes(48);
-
-            MemoryUtils.unsafeCopy(bytes_.getDataPointer(), lhs_.getDataPointer(), 48);
-            MemoryUtils.unsafeCopy(bytes_.getDataPointer() + 48, rhs_.getDataPointer(), 48);
-
-            return (_U384.fromBytes(lhs_), _U384.fromBytes(rhs_));
-        }
-    }
-}
diff --git a/contracts/libs/crypto/opt/U384.sol b/contracts/libs/crypto/opt/U384.sol
deleted file mode 100644
index 31cddb7f..00000000
--- a/contracts/libs/crypto/opt/U384.sol
+++ /dev/null
@@ -1,498 +0,0 @@
-// SPDX-License-Identifier: MIT
-pragma solidity ^0.8.4;
-
-/**
- * @notice Low-level utility library that implements unsigned 384-bit arithmetics.
- *
- * Serves for internal purposes only.
- */
-library _U384 {
-    uint256 private constant _UINT384_ALLOCATION = 64;
-    uint256 private constant _CALL_ALLOCATION = 4 * 288;
-    uint256 private constant _MUL_OFFSET = 288;
-    uint256 private constant _EXP_OFFSET = 2 * 288;
-    uint256 private constant _INV_OFFSET = 3 * 288;
-
-    function initCall(uint256 m_) internal pure returns (uint256 handler_) {
-        unchecked {
-            handler_ = _allocate(_CALL_ALLOCATION);
-
-            _sub(m_, fromUint256(2), handler_ + _INV_OFFSET + 0xA0);
-
-            assembly {
-                let call_ := add(handler_, _MUL_OFFSET)
-
-                mstore(call_, 0x60)
-                mstore(add(0x20, call_), 0x20)
-                mstore(add(0x40, call_), 0x40)
-                mstore(add(0xC0, call_), 0x01)
-                mstore(add(0xE0, call_), mload(m_))
-                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
-
-                call_ := add(handler_, _EXP_OFFSET)
-
-                mstore(call_, 0x40)
-                mstore(add(0x20, call_), 0x20)
-                mstore(add(0x40, call_), 0x40)
-                mstore(add(0xC0, call_), mload(m_))
-                mstore(add(0xE0, call_), mload(add(m_, 0x20)))
-
-                call_ := add(handler_, _INV_OFFSET)
-
-                mstore(call_, 0x40)
-                mstore(add(0x20, call_), 0x40)
-                mstore(add(0x40, call_), 0x40)
-                mstore(add(0xE0, call_), mload(m_))
-                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
-            }
-        }
-    }
-
-    function fromUint256(uint256 u256_) internal pure returns (uint256 handler_) {
-        unchecked {
-            handler_ = _allocate(_UINT384_ALLOCATION);
-
-            assembly {
-                mstore(handler_, 0x00)
-                mstore(add(handler_, 0x20), u256_)
-            }
-        }
-    }
-
-    function fromBytes(bytes memory bytes_) internal view returns (uint256 handler_) {
-        unchecked {
-            assert(bytes_.length < 49);
-
-            handler_ = _allocate(_UINT384_ALLOCATION);
-
-            assembly {
-                mstore(handler_, 0)
-                mstore(add(handler_, 0x20), 0)
-
-                let size_ := mload(bytes_)
-                pop(
-                    staticcall(
-                        gas(),
-                        0x4,
-                        add(bytes_, 0x20),
-                        size_,
-                        add(handler_, sub(0x40, size_)),
-                        size_
-                    )
-                )
-            }
-        }
-    }
-
-    function copy(uint256 handler_) internal pure returns (uint256 handlerCopy_) {
-        unchecked {
-            handlerCopy_ = _allocate(_UINT384_ALLOCATION);
-
-            assembly {
-                mstore(handlerCopy_, mload(handler_))
-                mstore(add(handlerCopy_, 0x20), mload(add(handler_, 0x20)))
-            }
-
-            return handlerCopy_;
-        }
-    }
-
-    function eq(uint256 a_, uint256 b_) internal pure returns (bool eq_) {
-        assembly {
-            eq_ := and(eq(mload(a_), mload(b_)), eq(mload(add(a_, 0x20)), mload(add(b_, 0x20))))
-        }
-    }
-
-    function eqUint256(uint256 a_, uint256 bInteger_) internal pure returns (bool eq_) {
-        assembly {
-            eq_ := and(eq(mload(a_), 0), eq(mload(add(a_, 0x20)), bInteger_))
-        }
-    }
-
-    function cmp(uint256 a_, uint256 b_) internal pure returns (int256) {
-        unchecked {
-            uint256 aWord_;
-            uint256 bWord_;
-
-            assembly {
-                aWord_ := mload(a_)
-                bWord_ := mload(b_)
-            }
-
-            if (aWord_ > bWord_) {
-                return 1;
-            }
-
-            if (aWord_ < bWord_) {
-                return -1;
-            }
-
-            assembly {
-                aWord_ := mload(add(a_, 0x20))
-                bWord_ := mload(add(b_, 0x20))
-            }
-
-            if (aWord_ > bWord_) {
-                return 1;
-            }
-
-            if (aWord_ < bWord_) {
-                return -1;
-            }
-
-            return 0;
-        }
-    }
-
-    function modAssign(uint256 call_, uint256 a_, uint256 m_) internal view {
-        assembly {
-            mstore(call_, 0x40)
-            mstore(add(0x20, call_), 0x20)
-            mstore(add(0x40, call_), 0x40)
-            mstore(add(0x60, call_), mload(a_))
-            mstore(add(0x80, call_), mload(add(a_, 0x20)))
-            mstore(add(0xA0, call_), 0x01)
-            mstore(add(0xC0, call_), mload(m_))
-            mstore(add(0xE0, call_), mload(add(m_, 0x20)))
-
-            pop(staticcall(gas(), 0x5, call_, 0x0100, a_, 0x40))
-        }
-    }
-
-    function modexp(
-        uint256 call_,
-        uint256 b_,
-        uint256 eInteger_
-    ) internal view returns (uint256 r_) {
-        unchecked {
-            r_ = _allocate(_UINT384_ALLOCATION);
-
-            assembly {
-                call_ := add(call_, _EXP_OFFSET)
-
-                mstore(add(0x60, call_), mload(b_))
-                mstore(add(0x80, call_), mload(add(b_, 0x20)))
-                mstore(add(0xA0, call_), eInteger_)
-
-                pop(staticcall(gas(), 0x5, call_, 0x0100, r_, 0x40))
-            }
-
-            return r_;
-        }
-    }
-
-    function modexpAssignTo(
-        uint256 call_,
-        uint256 to_,
-        uint256 b_,
-        uint256 eInteger_
-    ) internal view {
-        assembly {
-            call_ := add(call_, _EXP_OFFSET)
-
-            mstore(add(0x60, call_), mload(b_))
-            mstore(add(0x80, call_), mload(add(b_, 0x20)))
-            mstore(add(0xA0, call_), eInteger_)
-
-            pop(staticcall(gas(), 0x5, call_, 0x0100, to_, 0x40))
-        }
-    }
-
-    function modadd(uint256 a_, uint256 b_, uint256 m_) internal pure returns (uint256 r_) {
-        unchecked {
-            r_ = _allocate(_UINT384_ALLOCATION);
-
-            _add(a_, b_, r_);
-
-            if (cmp(r_, m_) >= 0) {
-                _subFrom(r_, m_);
-            }
-
-            return r_;
-        }
-    }
-
-    function modaddAssign(uint256 a_, uint256 b_, uint256 m_) internal pure {
-        unchecked {
-            _addTo(a_, b_);
-
-            if (cmp(a_, m_) >= 0) {
-                return _subFrom(a_, m_);
-            }
-        }
-    }
-
-    function modmul(uint256 call_, uint256 a_, uint256 b_) internal view returns (uint256 r_) {
-        unchecked {
-            r_ = _allocate(_UINT384_ALLOCATION);
-
-            _mul(a_, b_, call_ + _MUL_OFFSET + 0x60);
-
-            assembly {
-                call_ := add(call_, _MUL_OFFSET)
-
-                pop(staticcall(gas(), 0x5, call_, 0x0120, r_, 0x40))
-            }
-
-            return r_;
-        }
-    }
-
-    function modmulAssign(uint256 call_, uint256 a_, uint256 b_) internal view {
-        unchecked {
-            _mul(a_, b_, call_ + _MUL_OFFSET + 0x60);
-
-            assembly {
-                call_ := add(call_, _MUL_OFFSET)
-
-                pop(staticcall(gas(), 0x5, call_, 0x0120, a_, 0x40))
-            }
-        }
-    }
-
-    function modsub(uint256 a_, uint256 b_, uint256 m_) internal pure returns (uint256 r_) {
-        unchecked {
-            r_ = _allocate(_UINT384_ALLOCATION);
-
-            if (cmp(a_, b_) >= 0) {
-                _sub(a_, b_, r_);
-                return r_;
-            }
-
-            _add(a_, m_, r_);
-            _subFrom(r_, b_);
-        }
-    }
-
-    function modsubAssign(uint256 a_, uint256 b_, uint256 m_) internal pure {
-        unchecked {
-            if (cmp(a_, b_) >= 0) {
-                _subFrom(a_, b_);
-                return;
-            }
-
-            _addTo(a_, m_);
-            _subFrom(a_, b_);
-        }
-    }
-
-    function modsubAssignTo(uint256 to_, uint256 a_, uint256 b_, uint256 m_) internal pure {
-        unchecked {
-            if (cmp(a_, b_) >= 0) {
-                _sub(a_, b_, to_);
-                return;
-            }
-
-            _add(a_, m_, to_);
-            _subFrom(to_, b_);
-        }
-    }
-
-    function modshl1(uint256 a_, uint256 m_) internal pure returns (uint256 r_) {
-        unchecked {
-            r_ = _allocate(_UINT384_ALLOCATION);
-
-            _shl1(a_, r_);
-
-            if (cmp(r_, m_) >= 0) {
-                _subFrom(r_, m_);
-            }
-
-            return r_;
-        }
-    }
-
-    function modshl1AssignTo(uint256 to_, uint256 a_, uint256 m_) internal pure {
-        unchecked {
-            _shl1(a_, to_);
-
-            if (cmp(to_, m_) >= 0) {
-                _subFrom(to_, m_);
-            }
-        }
-    }
-
-    /// @dev Stores modinv into `b_` and moddiv into `a_`.
-    function moddivAssign(uint256 call_, uint256 a_, uint256 b_) internal view {
-        unchecked {
-            assembly {
-                call_ := add(call_, _INV_OFFSET)
-
-                mstore(add(0x60, call_), mload(b_))
-                mstore(add(0x80, call_), mload(add(b_, 0x20)))
-
-                pop(staticcall(gas(), 0x5, call_, 0x0120, b_, 0x40))
-            }
-
-            modmulAssign(call_ - _INV_OFFSET, a_, b_);
-        }
-    }
-
-    function moddiv(
-        uint256 call_,
-        uint256 a_,
-        uint256 b_,
-        uint256 m_
-    ) internal view returns (uint256 r_) {
-        unchecked {
-            r_ = modinv(call_, b_, m_);
-
-            _mul(a_, r_, call_ + 0x60);
-
-            assembly {
-                mstore(call_, 0x60)
-                mstore(add(0x20, call_), 0x20)
-                mstore(add(0x40, call_), 0x40)
-                mstore(add(0xC0, call_), 0x01)
-                mstore(add(0xE0, call_), mload(m_))
-                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
-
-                pop(staticcall(gas(), 0x5, call_, 0x0120, r_, 0x40))
-            }
-        }
-    }
-
-    function modinv(uint256 call_, uint256 b_, uint256 m_) internal view returns (uint256 r_) {
-        unchecked {
-            r_ = _allocate(_UINT384_ALLOCATION);
-
-            _sub(m_, fromUint256(2), call_ + 0xA0);
-
-            assembly {
-                mstore(call_, 0x40)
-                mstore(add(0x20, call_), 0x40)
-                mstore(add(0x40, call_), 0x40)
-                mstore(add(0x60, call_), mload(b_))
-                mstore(add(0x80, call_), mload(add(b_, 0x20)))
-                mstore(add(0xE0, call_), mload(m_))
-                mstore(add(0x0100, call_), mload(add(m_, 0x20)))
-
-                pop(staticcall(gas(), 0x5, call_, 0x0120, r_, 0x40))
-            }
-        }
-    }
-
-    function _shl1(uint256 a_, uint256 r_) internal pure {
-        assembly {
-            let a1_ := mload(add(a_, 0x20))
-
-            mstore(r_, or(shl(1, mload(a_)), shr(255, a1_)))
-            mstore(add(r_, 0x20), shl(1, a1_))
-        }
-    }
-
-    function _add(uint256 a_, uint256 b_, uint256 r_) private pure {
-        assembly {
-            let aWord_ := mload(add(a_, 0x20))
-            let sum_ := add(aWord_, mload(add(b_, 0x20)))
-
-            mstore(add(r_, 0x20), sum_)
-
-            sum_ := gt(aWord_, sum_)
-            sum_ := add(sum_, add(mload(a_), mload(b_)))
-
-            mstore(r_, sum_)
-        }
-    }
-
-    function _sub(uint256 a_, uint256 b_, uint256 r_) private pure {
-        assembly {
-            let aWord_ := mload(add(a_, 0x20))
-            let diff_ := sub(aWord_, mload(add(b_, 0x20)))
-
-            mstore(add(r_, 0x20), diff_)
-
-            diff_ := gt(diff_, aWord_)
-            diff_ := sub(sub(mload(a_), mload(b_)), diff_)
-
-            mstore(r_, diff_)
-        }
-    }
-
-    function _subFrom(uint256 a_, uint256 b_) private pure {
-        assembly {
-            let aWord_ := mload(add(a_, 0x20))
-            let diff_ := sub(aWord_, mload(add(b_, 0x20)))
-
-            mstore(add(a_, 0x20), diff_)
-
-            diff_ := gt(diff_, aWord_)
-            diff_ := sub(sub(mload(a_), mload(b_)), diff_)
-
-            mstore(a_, diff_)
-        }
-    }
-
-    function _addTo(uint256 a_, uint256 b_) private pure {
-        assembly {
-            let aWord_ := mload(add(a_, 0x20))
-            let sum_ := add(aWord_, mload(add(b_, 0x20)))
-
-            mstore(add(a_, 0x20), sum_)
-
-            sum_ := gt(aWord_, sum_)
-            sum_ := add(sum_, add(mload(a_), mload(b_)))
-
-            mstore(a_, sum_)
-        }
-    }
-
-    function _mul(uint256 a_, uint256 b_, uint256 r_) private pure {
-        unchecked {
-            assembly {
-                let a0_ := mload(a_)
-                let a1_ := mload(add(a_, 0x20))
-                let b0_ := mload(b_)
-                let b1_ := mload(add(b_, 0x20))
-
-                let mm_ := mulmod(
-                    a1_,
-                    b1_,
-                    0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
-                )
-                let c3_ := mul(a1_, b1_)
-                let c2_ := sub(sub(mm_, c3_), lt(mm_, c3_))
-
-                mm_ := mulmod(
-                    a0_,
-                    b1_,
-                    0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
-                )
-                let prod1_ := mul(a0_, b1_)
-                let prod0_ := sub(sub(mm_, prod1_), lt(mm_, prod1_))
-
-                c2_ := add(c2_, prod1_)
-                let c1_ := lt(c2_, prod1_)
-                c1_ := add(c1_, prod0_)
-
-                mm_ := mulmod(
-                    a1_,
-                    b0_,
-                    0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
-                )
-                prod1_ := mul(a1_, b0_)
-                prod0_ := sub(sub(mm_, prod1_), lt(mm_, prod1_))
-
-                c2_ := add(c2_, prod1_)
-                c1_ := add(c1_, lt(c2_, prod1_))
-                c1_ := add(c1_, prod0_)
-                c1_ := add(c1_, mul(a0_, b0_))
-
-                mstore(add(r_, 0x40), c3_)
-                mstore(add(r_, 0x20), c2_)
-                mstore(r_, c1_)
-            }
-        }
-    }
-
-    function _allocate(uint256 bytes_) private pure returns (uint256 handler_) {
-        unchecked {
-            assembly {
-                handler_ := mload(0x40)
-                mstore(0x40, add(handler_, bytes_))
-            }
-
-            return handler_;
-        }
-    }
-}

From b3cffd6623eaabddbb42eef69110936b5aa3ac1b Mon Sep 17 00:00:00 2001
From: mllwchrry <mariia.zhvanko@gmail.com>
Date: Thu, 23 Jan 2025 19:15:41 +0200
Subject: [PATCH 25/42] add tests for U512

---
 contracts/mock/libs/bn/U512Mock.sol | 501 ++++++++++++++++++++++++++++
 test/libs/bn/U512.test.ts           | 216 +++++++++++-
 2 files changed, 715 insertions(+), 2 deletions(-)

diff --git a/contracts/mock/libs/bn/U512Mock.sol b/contracts/mock/libs/bn/U512Mock.sol
index 7c69ca31..74737057 100644
--- a/contracts/mock/libs/bn/U512Mock.sol
+++ b/contracts/mock/libs/bn/U512Mock.sol
@@ -8,6 +8,241 @@ import {U512} from "../../../libs/bn/U512.sol";
 contract U512Mock {
     using U512 for *;
 
+    function copy(
+        uint256 u256_
+    )
+        external
+        view
+        returns (
+            uint512 pointerOriginal_,
+            uint512 pointerCopy_,
+            bytes memory valueOriginal_,
+            bytes memory valueCopy_
+        )
+    {
+        pointerOriginal_ = U512.fromUint256(u256_);
+        valueOriginal_ = U512.toBytes(pointerOriginal_);
+
+        pointerCopy_ = U512.copy(pointerOriginal_);
+        valueCopy_ = U512.toBytes(pointerCopy_);
+    }
+
+    function isNull(uint512 pointer_) external view returns (bool isNull_) {
+        return U512.isNull(pointer_);
+    }
+
+    function eq(bytes memory aBytes_, bytes memory bBytes_) external view returns (bool eq_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+
+        return U512.eq(a_, b_);
+    }
+
+    function eqUint256(bytes memory aBytes_, uint256 u256_) external view returns (bool eq_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+
+        return U512.eqUint256(a_, u256_);
+    }
+
+    function cmp(bytes memory aBytes_, bytes memory bBytes_) external view returns (int256) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+
+        return U512.cmp(a_, b_);
+    }
+
+    function mod(
+        bytes memory aBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        return U512.mod(call_, a_, m_).toBytes();
+    }
+
+    function modAssign(
+        bytes memory aBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        U512.modAssign(call_, a_, m_);
+
+        return a_.toBytes();
+    }
+
+    function modAssignTo(
+        bytes memory aBytes_,
+        bytes memory mBytes_,
+        bytes memory toBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+        uint512 to_ = U512.fromBytes(toBytes_);
+
+        U512.modAssignTo(call_, a_, m_, to_);
+
+        return to_.toBytes();
+    }
+
+    function modinv(
+        bytes memory aBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        return U512.modinv(call_, a_, m_).toBytes();
+    }
+
+    function modinvAssign(
+        bytes memory aBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        U512.modinvAssign(call_, a_, m_);
+
+        return a_.toBytes();
+    }
+
+    function modinvAssignTo(
+        bytes memory aBytes_,
+        bytes memory mBytes_,
+        bytes memory toBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+        uint512 to_ = U512.fromBytes(toBytes_);
+
+        U512.modinvAssignTo(call_, a_, m_, to_);
+
+        return to_.toBytes();
+    }
+
+    function add(
+        bytes memory aBytes_,
+        bytes memory bBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+
+        return U512.add(a_, b_).toBytes();
+    }
+
+    function addAssign(
+        bytes memory aBytes_,
+        bytes memory bBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+
+        U512.addAssign(a_, b_);
+
+        return a_.toBytes();
+    }
+
+    function addAssignTo(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory toBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 to_ = U512.fromBytes(toBytes_);
+
+        U512.addAssignTo(a_, b_, to_);
+
+        return to_.toBytes();
+    }
+
+    function sub(
+        bytes memory aBytes_,
+        bytes memory bBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+
+        return U512.sub(a_, b_).toBytes();
+    }
+
+    function subAssign(
+        bytes memory aBytes_,
+        bytes memory bBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+
+        U512.subAssign(a_, b_);
+
+        return a_.toBytes();
+    }
+
+    function subAssignTo(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory toBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 to_ = U512.fromBytes(toBytes_);
+
+        U512.subAssignTo(a_, b_, to_);
+
+        return to_.toBytes();
+    }
+
+    function mul(
+        bytes memory aBytes_,
+        bytes memory bBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+
+        return U512.mul(a_, b_).toBytes();
+    }
+
+    function mulAssign(
+        bytes memory aBytes_,
+        bytes memory bBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+
+        U512.mulAssign(a_, b_);
+
+        return a_.toBytes();
+    }
+
+    function mulAssignTo(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory toBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 to_ = U512.fromBytes(toBytes_);
+
+        U512.mulAssignTo(a_, b_, to_);
+
+        return to_.toBytes();
+    }
+
     function modadd(
         bytes memory aBytes_,
         bytes memory bBytes_,
@@ -22,6 +257,88 @@ contract U512Mock {
         return U512.modadd(call_, a_, b_, m_).toBytes();
     }
 
+    function modaddAssign(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        U512.modaddAssign(call_, a_, b_, m_);
+
+        return a_.toBytes();
+    }
+
+    function modaddAssignTo(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_,
+        bytes memory toBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+        uint512 to_ = U512.fromBytes(toBytes_);
+
+        U512.modaddAssignTo(call_, a_, b_, m_, to_);
+
+        return to_.toBytes();
+    }
+
+    function redadd(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        return U512.redadd(call_, a_, b_, m_).toBytes();
+    }
+
+    function redaddAssign(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        U512.redaddAssign(call_, a_, b_, m_);
+
+        return a_.toBytes();
+    }
+
+    function redaddAssignTo(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_,
+        bytes memory toBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+        uint512 to_ = U512.fromBytes(toBytes_);
+
+        U512.redaddAssignTo(call_, a_, b_, m_, to_);
+
+        return to_.toBytes();
+    }
+
     function modsub(
         bytes memory aBytes_,
         bytes memory bBytes_,
@@ -36,6 +353,88 @@ contract U512Mock {
         return U512.modsub(call_, a_, b_, m_).toBytes();
     }
 
+    function modsubAssign(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        U512.modsubAssign(call_, a_, b_, m_);
+
+        return a_.toBytes();
+    }
+
+    function modsubAssignTo(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_,
+        bytes memory toBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+        uint512 to_ = U512.fromBytes(toBytes_);
+
+        U512.modsubAssignTo(call_, a_, b_, m_, to_);
+
+        return to_.toBytes();
+    }
+
+    function redsub(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        return U512.redsub(call_, a_, b_, m_).toBytes();
+    }
+
+    function redsubAssign(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        U512.redsubAssign(call_, a_, b_, m_);
+
+        return a_.toBytes();
+    }
+
+    function redsubAssignTo(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_,
+        bytes memory toBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+        uint512 to_ = U512.fromBytes(toBytes_);
+
+        U512.redsubAssignTo(call_, a_, b_, m_, to_);
+
+        return to_.toBytes();
+    }
+
     function modmul(
         bytes memory aBytes_,
         bytes memory bBytes_,
@@ -50,6 +449,40 @@ contract U512Mock {
         return U512.modmul(call_, a_, b_, m_).toBytes();
     }
 
+    function modmulAssign(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        U512.modmulAssign(call_, a_, b_, m_);
+
+        return a_.toBytes();
+    }
+
+    function modmulAssignTo(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_,
+        bytes memory toBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+        uint512 to_ = U512.fromBytes(toBytes_);
+
+        U512.modmulAssignTo(call_, a_, b_, m_, to_);
+
+        return to_.toBytes();
+    }
+
     function modexp(
         bytes memory aBytes_,
         bytes memory bBytes_,
@@ -64,6 +497,40 @@ contract U512Mock {
         return U512.modexp(call_, a_, b_, m_).toBytes();
     }
 
+    function modexpAssign(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        U512.modexpAssign(call_, a_, b_, m_);
+
+        return a_.toBytes();
+    }
+
+    function modexpAssignTo(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_,
+        bytes memory toBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+        uint512 to_ = U512.fromBytes(toBytes_);
+
+        U512.modexpAssignTo(call_, a_, b_, m_, to_);
+
+        return to_.toBytes();
+    }
+
     function moddiv(
         bytes memory aBytes_,
         bytes memory bBytes_,
@@ -77,4 +544,38 @@ contract U512Mock {
 
         return U512.moddiv(call_, a_, b_, m_).toBytes();
     }
+
+    function moddivAssign(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        U512.moddivAssign(call_, a_, b_, m_);
+
+        return a_.toBytes();
+    }
+
+    function moddivAssignTo(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_,
+        bytes memory toBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+        uint512 to_ = U512.fromBytes(toBytes_);
+
+        U512.moddivAssignTo(call_, a_, b_, m_, to_);
+
+        return to_.toBytes();
+    }
 }
diff --git a/test/libs/bn/U512.test.ts b/test/libs/bn/U512.test.ts
index 90a626a7..d0c024f8 100644
--- a/test/libs/bn/U512.test.ts
+++ b/test/libs/bn/U512.test.ts
@@ -4,9 +4,11 @@ import { Reverter } from "@/test/helpers/reverter";
 
 import { U512Mock } from "@ethers-v6";
 
-describe("U512", () => {
+describe.only("U512", () => {
   const reverter = new Reverter();
 
+  const prime = 76884956397045344220809746629001649092737531784414529538755519063063536359079n;
+
   let u512: U512Mock;
 
   function randomU512(): string {
@@ -17,6 +19,47 @@ describe("U512", () => {
     return "0x" + value.toString(16).padStart(128, "0");
   }
 
+  function mod(a: string, m: string): string {
+    return toBytes(ethers.toBigInt(a) % ethers.toBigInt(m));
+  }
+
+  function add(a: string, b: string): string {
+    const maxUint512 = BigInt(1) << BigInt(512);
+
+    const aBigInt = ethers.toBigInt(a);
+    const bBigInt = ethers.toBigInt(b);
+
+    const result = (aBigInt + bBigInt) % maxUint512;
+
+    return toBytes(result);
+  }
+
+  function sub(a: string, b: string): string {
+    const maxUint512 = BigInt(1) << BigInt(512);
+
+    const aBigInt = ethers.toBigInt(a);
+    const bBigInt = ethers.toBigInt(b);
+
+    let result = (aBigInt - bBigInt) % maxUint512;
+
+    if (result < 0) {
+      result += maxUint512;
+    }
+
+    return toBytes(result);
+  }
+
+  function mul(a: string, b: string): string {
+    const maxUint512 = BigInt(1) << BigInt(512);
+
+    const aBigInt = ethers.toBigInt(a);
+    const bBigInt = ethers.toBigInt(b);
+
+    const result = (aBigInt * bBigInt) % maxUint512;
+
+    return toBytes(result);
+  }
+
   function modadd(a: string, b: string, m: string): string {
     return toBytes((ethers.toBigInt(a) + ethers.toBigInt(b)) % ethers.toBigInt(m));
   }
@@ -29,6 +72,34 @@ describe("U512", () => {
     return toBytes(ethers.toBigInt(a) ** ethers.toBigInt(b) % ethers.toBigInt(m));
   }
 
+  function modinv(a: string, m: string): string {
+    const aBigInt = ethers.toBigInt(a);
+    const mBigInt = ethers.toBigInt(m);
+
+    if (aBigInt <= 0n || mBigInt <= 0n) {
+      throw new Error("Inputs must be positive integers.");
+    }
+
+    let [t, newT] = [0n, 1n];
+    let [r, newR] = [mBigInt, aBigInt];
+
+    while (newR !== 0n) {
+      const quotient = r / newR;
+      [t, newT] = [newT, t - quotient * newT];
+      [r, newR] = [newR, r - quotient * newR];
+    }
+
+    if (r > 1n) {
+      throw new Error("No modular inverse exists.");
+    }
+
+    if (t < 0n) {
+      t += mBigInt;
+    }
+
+    return toBytes(t);
+  }
+
   function modsub(a: string, b: string, m: string): string {
     const aBn = ethers.toBigInt(a);
     const bBn = ethers.toBigInt(b);
@@ -47,13 +118,125 @@ describe("U512", () => {
 
   afterEach(reverter.revert);
 
+  it("copy test", async () => {
+    const [pointerOriginal, pointerCopy, valueOriginal, valueCopy] = await u512.copy(prime);
+
+    expect(pointerOriginal).to.be.lessThan(pointerCopy);
+    expect(valueOriginal).to.be.equal(valueCopy);
+  });
+
+  it("isNull test", async () => {
+    expect(await u512.isNull(0)).to.be.true;
+    expect(await u512.isNull(64)).to.be.false;
+  });
+
+  it("eq test", async () => {
+    expect(await u512.eq(toBytes(1020n), toBytes(1002n))).to.be.false;
+    expect(await u512.eq(toBytes(200n), toBytes(200n))).to.be.true;
+    expect(await u512.eq("0x00", "0x00")).to.be.true;
+  });
+
+  it("eqUint256 test", async () => {
+    expect(await u512.eqUint256(toBytes(1020n), 1002n)).to.be.false;
+    expect(await u512.eqUint256(toBytes(200n), 200n)).to.be.true;
+    expect(await u512.eqUint256("0x00", 0)).to.be.true;
+  });
+
+  it("cmp test", async () => {
+    expect(await u512.cmp(toBytes(705493n), toBytes(705492n))).to.be.equal(1);
+    expect(await u512.cmp(toBytes(1n), "0x00")).to.be.equal(1);
+    expect(await u512.cmp(toBytes(775n), toBytes(775n))).to.be.equal(0);
+    expect(await u512.cmp("0x00", "0x00")).to.be.equal(0);
+    expect(await u512.cmp(toBytes(380n), toBytes(400n))).to.be.equal(-1);
+    expect(await u512.cmp("0x00", toBytes(12n))).to.be.equal(-1);
+  });
+
+  it("mod test", async () => {
+    for (let i = 0; i < 100; ++i) {
+      const a = randomU512();
+      const m = randomU512();
+      const to = randomU512();
+
+      expect(await u512.mod(a, m)).to.be.equal(mod(a, m));
+      expect(await u512.modAssign(a, m)).to.be.equal(mod(a, m));
+      expect(await u512.modAssignTo(a, m, to)).to.be.equal(mod(a, m));
+    }
+  });
+
+  it("modinv test", async () => {
+    const m = toBytes(prime);
+
+    for (let i = 0; i < 100; ++i) {
+      const a = randomU512();
+      const to = randomU512();
+
+      expect(await u512.modinv(a, m)).to.be.equal(modinv(a, m));
+      expect(await u512.modinvAssign(a, m)).to.be.equal(modinv(a, m));
+      expect(await u512.modinvAssignTo(a, m, to)).to.be.equal(modinv(a, m));
+    }
+  });
+
+  it("add test", async () => {
+    for (let i = 0; i < 100; ++i) {
+      const a = randomU512();
+      const b = randomU512();
+      const to = randomU512();
+
+      expect(await u512.add(a, b)).to.be.equal(add(a, b));
+      expect(await u512.addAssign(a, b)).to.be.equal(add(a, b));
+      expect(await u512.addAssignTo(a, b, to)).to.be.equal(add(a, b));
+    }
+  });
+
+  it("sub test", async () => {
+    for (let i = 0; i < 100; ++i) {
+      const a = randomU512();
+      const b = randomU512();
+      const to = randomU512();
+
+      expect(await u512.sub(a, b)).to.be.equal(sub(a, b));
+      expect(await u512.subAssign(a, b)).to.be.equal(sub(a, b));
+      expect(await u512.subAssignTo(a, b, to)).to.be.equal(sub(a, b));
+    }
+  });
+
+  it("mul test", async () => {
+    for (let i = 0; i < 100; ++i) {
+      const a = randomU512();
+      const b = randomU512();
+      const to = randomU512();
+
+      expect(await u512.mul(a, b)).to.be.equal(mul(a, b));
+      expect(await u512.mulAssign(a, b)).to.be.equal(mul(a, b));
+      expect(await u512.mulAssignTo(a, b, to)).to.be.equal(mul(a, b));
+    }
+  });
+
   it("modadd test", async () => {
     for (let i = 0; i < 100; ++i) {
       const a = randomU512();
       const b = randomU512();
       const m = randomU512();
+      const to = randomU512();
 
       expect(await u512.modadd(a, b, m)).to.equal(modadd(a, b, m));
+      expect(await u512.modaddAssign(a, b, m)).to.equal(modadd(a, b, m));
+      expect(await u512.modaddAssignTo(a, b, m, to)).to.equal(modadd(a, b, m));
+    }
+  });
+
+  it("redadd test", async () => {
+    for (let i = 0; i < 100; ++i) {
+      const m = randomU512();
+
+      const a = mod(randomU512(), m);
+      const b = mod(randomU512(), m);
+
+      const to = randomU512();
+
+      expect(await u512.redadd(a, b, m)).to.equal(modadd(a, b, m));
+      expect(await u512.redaddAssign(a, b, m)).to.equal(modadd(a, b, m));
+      expect(await u512.redaddAssignTo(a, b, m, to)).to.equal(modadd(a, b, m));
     }
   });
 
@@ -62,8 +245,11 @@ describe("U512", () => {
       const a = randomU512();
       const b = randomU512();
       const m = randomU512();
+      const to = randomU512();
 
       expect(await u512.modmul(a, b, m)).to.equal(modmul(a, b, m));
+      expect(await u512.modmulAssign(a, b, m)).to.equal(modmul(a, b, m));
+      expect(await u512.modmulAssignTo(a, b, m, to)).to.equal(modmul(a, b, m));
     }
   });
 
@@ -72,8 +258,26 @@ describe("U512", () => {
       const a = randomU512();
       const b = randomU512();
       const m = randomU512();
+      const to = randomU512();
 
       expect(await u512.modsub(a, b, m)).to.equal(modsub(a, b, m));
+      expect(await u512.modsubAssign(a, b, m)).to.equal(modsub(a, b, m));
+      expect(await u512.modsubAssignTo(a, b, m, to)).to.equal(modsub(a, b, m));
+    }
+  });
+
+  it("redsub test", async () => {
+    for (let i = 0; i < 100; ++i) {
+      const m = randomU512();
+
+      const a = mod(randomU512(), m);
+      const b = mod(randomU512(), m);
+
+      const to = randomU512();
+
+      expect(await u512.redsub(a, b, m)).to.equal(modsub(a, b, m));
+      expect(await u512.redsubAssign(a, b, m)).to.equal(modsub(a, b, m));
+      expect(await u512.redsubAssignTo(a, b, m, to)).to.equal(modsub(a, b, m));
     }
   });
 
@@ -82,18 +286,26 @@ describe("U512", () => {
       const a = randomU512();
       const b = toBytes(100n);
       const m = randomU512();
+      const to = randomU512();
 
       expect(await u512.modexp(a, b, m)).to.equal(modexp(a, b, m));
+      expect(await u512.modexpAssign(a, b, m)).to.equal(modexp(a, b, m));
+      expect(await u512.modexpAssignTo(a, b, m, to)).to.equal(modexp(a, b, m));
     }
   });
 
   it("moddiv test", async () => {
+    const m = toBytes(prime);
+
     const a = toBytes(779149564533142355434093157610126726613246737199n);
     const b = toBytes(29118654464229156312755475164902924590603964377702716942232927993582928167089n);
-    const m = toBytes(76884956397045344220809746629001649092737531784414529538755519063063536359079n);
+
+    const to = randomU512();
 
     const expected = toBytes(30823410400962253491978005949535646087432096635784775122170630924100507445065n);
 
     expect(await u512.moddiv(a, b, m)).to.equal(expected);
+    expect(await u512.moddivAssign(a, b, m)).to.equal(expected);
+    expect(await u512.moddivAssignTo(a, b, m, to)).to.equal(expected);
   });
 });

From 5df79f0fe543a40259faab8409e5d5758cb6b94c Mon Sep 17 00:00:00 2001
From: mllwchrry <mariia.zhvanko@gmail.com>
Date: Fri, 24 Jan 2025 14:58:03 +0200
Subject: [PATCH 26/42] add natspec

---
 contracts/libs/bn/U512.sol          | 391 ++++++++++++++++++++++++++++
 contracts/mock/libs/bn/U512Mock.sol |   4 +-
 package.json                        |   2 +-
 test/libs/bn/U512.test.ts           |   2 +-
 test/libs/crypto/ECDSA384.test.ts   |   2 +-
 test/libs/crypto/ECDSA512.test.ts   |   2 +-
 6 files changed, 397 insertions(+), 6 deletions(-)

diff --git a/contracts/libs/bn/U512.sol b/contracts/libs/bn/U512.sol
index 65bf8e24..3af41808 100644
--- a/contracts/libs/bn/U512.sol
+++ b/contracts/libs/bn/U512.sol
@@ -12,12 +12,21 @@ library U512 {
     uint256 private constant _BYTES_ALLOCATION = 96;
     uint256 private constant _CALL_ALLOCATION = 384;
 
+    /**
+     * @notice Initializes a memory pointer for precompile call arguments.
+     * @return call_ A memory pointer for precompile operations.
+     */
     function initCall() internal pure returns (call call_) {
         unchecked {
             call_ = call.wrap(_allocate(_CALL_ALLOCATION));
         }
     }
 
+    /**
+     * @notice Converts a 256-bit unsigned integer to a 512-bit unsigned integer.
+     * @param u256_ The 256-bit unsigned integer to convert.
+     * @return u512_ The 512-bit representation of the input.
+     */
     function fromUint256(uint256 u256_) internal pure returns (uint512 u512_) {
         unchecked {
             u512_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
@@ -29,6 +38,12 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Converts a byte array to a 512-bit unsigned integer.
+     * @dev The byte array must be less than 65 bytes.
+     * @param bytes_ The byte array to convert.
+     * @return u512_ The 512-bit representation of the byte array.
+     */
     function fromBytes(bytes memory bytes_) internal view returns (uint512 u512_) {
         unchecked {
             assert(bytes_.length < 65);
@@ -54,6 +69,11 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Copies a 512-bit unsigned integer to a new memory location.
+     * @param u512_ The 512-bit unsigned integer to copy.
+     * @return u512Copy_ A pointer to the new copy of the 512-bit unsigned integer.
+     */
     function copy(uint512 u512_) internal pure returns (uint512 u512Copy_) {
         unchecked {
             u512Copy_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
@@ -65,6 +85,11 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Converts a 512-bit unsigned integer to a byte array.
+     * @param u512_ The 512-bit unsigned integer to convert.
+     * @return bytes_ A byte array representation of the 512-bit unsigned integer.
+     */
     function toBytes(uint512 u512_) internal pure returns (bytes memory bytes_) {
         unchecked {
             uint256 handler_ = _allocate(_BYTES_ALLOCATION);
@@ -79,6 +104,11 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Checks if a uint512 pointer is null.
+     * @param u512_ The uint512 pointer to check.
+     * @return isNull_ True if the pointer is null, false otherwise.
+     */
     function isNull(uint512 u512_) internal pure returns (bool isNull_) {
         unchecked {
             assembly {
@@ -87,6 +117,12 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Compares two 512-bit unsigned integers for equality.
+     * @param a_ The first 512-bit unsigned integer.
+     * @param b_ The second 512-bit unsigned integer.
+     * @return eq_ True if the integers are equal, false otherwise.
+     */
     function eq(uint512 a_, uint512 b_) internal pure returns (bool eq_) {
         unchecked {
             assembly {
@@ -98,6 +134,12 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Compares a 512-bit unsigned integer with a 256-bit unsigned integer for equality.
+     * @param a_ The 512-bit unsigned integer.
+     * @param u256_ The 256-bit unsigned integer.
+     * @return eq_ True if the integers are equal, false otherwise.
+     */
     function eqUint256(uint512 a_, uint256 u256_) internal pure returns (bool eq_) {
         unchecked {
             assembly {
@@ -106,6 +148,12 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Compares two 512-bit unsigned integers.
+     * @param a_ The first 512-bit unsigned integer.
+     * @param b_ The second 512-bit unsigned integer.
+     * @return 1 if `a_ > b_`, -1 if `a_ < b_`, and 0 if they are equal.
+     */
     function cmp(uint512 a_, uint512 b_) internal pure returns (int256) {
         unchecked {
             uint256 aWord_;
@@ -141,6 +189,13 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs modular arithmetic on 512-bit integers.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param a_ The dividend.
+     * @param m_ The modulus.
+     * @return r_ The result of the modular operation `(a_ % m_)`.
+     */
     function mod(call call_, uint512 a_, uint512 m_) internal view returns (uint512 r_) {
         unchecked {
             r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
@@ -149,18 +204,41 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs modular assignment on a 512-bit unsigned integer.
+     * @dev Updates the value of `a_` to `(a_ % m_)`.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param a_ The dividend.
+     * @param m_ The modulus.
+     */
     function modAssign(call call_, uint512 a_, uint512 m_) internal view {
         unchecked {
             _mod(call_, a_, m_, a_);
         }
     }
 
+    /**
+     * @notice Performs modular assignment and stores the result in a separate 512-bit unsigned integer.
+     * @dev Assigns the result `(a_ % m_)` to `to_`.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param a_ The dividend.
+     * @param m_ The modulus.
+     * @param to_ The target 512-bit unsigned integer to store the result.
+     */
     function modAssignTo(call call_, uint512 a_, uint512 m_, uint512 to_) internal view {
         unchecked {
             _mod(call_, a_, m_, to_);
         }
     }
 
+    /**
+     * @notice Computes the modular inverse of a 512-bit unsigned integer.
+     * @dev Warning: The modulus `m_` must be a prime number
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param a_ The 512-bit unsigned integer to invert.
+     * @param m_ The modulus.
+     * @return r_ The modular inverse result `a_^(-1) % m_`.
+     */
     function modinv(call call_, uint512 a_, uint512 m_) internal view returns (uint512 r_) {
         unchecked {
             r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
@@ -169,18 +247,43 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs the modular inverse assignment on a 512-bit unsigned integer.
+     * @dev Warning: The modulus `m_` must be a prime number
+     * @dev Updates the value of `a_` to `a_^(-1) % m_`.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param a_ The 512-bit unsigned integer to invert.
+     * @param m_ The modulus.
+     */
     function modinvAssign(call call_, uint512 a_, uint512 m_) internal view {
         unchecked {
             _modinv(call_, a_, m_, a_);
         }
     }
 
+    /**
+     * @notice Computes the modular inverse and stores it in a separate 512-bit unsigned integer.
+     * @dev Warning: The modulus `m_` must be a prime number
+     * @dev Assigns the result of `a_^(-1) % m_` to `to_`.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param a_ The 512-bit unsigned integer to invert.
+     * @param m_ The modulus.
+     * @param to_ The target 512-bit unsigned integer to store the result.
+     */
     function modinvAssignTo(call call_, uint512 a_, uint512 m_, uint512 to_) internal view {
         unchecked {
             _modinv(call_, a_, m_, to_);
         }
     }
 
+    /**
+     * @notice Performs modular exponentiation on 512-bit unsigned integers.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param b_ The base.
+     * @param e_ The exponent.
+     * @param m_ The modulus.
+     * @return r_ The result of modular exponentiation `(b_^e_) % m_`.
+     */
     function modexp(
         call call_,
         uint512 b_,
@@ -194,12 +297,29 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs modular exponentiation assignment on the base.
+     * @dev Updates the value of `b_` to `(b_^e_) % m_`.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param b_ The base.
+     * @param e_ The exponent.
+     * @param m_ The modulus.
+     */
     function modexpAssign(call call_, uint512 b_, uint512 e_, uint512 m_) internal view {
         unchecked {
             _modexp(call_, b_, e_, m_, b_);
         }
     }
 
+    /**
+     * @notice Performs modular exponentiation and stores the result in a separate 512-bit unsigned integer.
+     * @dev Assigns the result of `(b_^e_) % m_` to `to_`.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param b_ The base.
+     * @param e_ The exponent.
+     * @param m_ The modulus.
+     * @param to_ The target 512-bit unsigned integer to store the result.
+     */
     function modexpAssignTo(
         call call_,
         uint512 b_,
@@ -212,6 +332,14 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Adds two 512-bit unsigned integers under a modulus.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param a_ The first addend.
+     * @param b_ The second addend.
+     * @param m_ The modulus.
+     * @return r_ The result of the modular addition `(a_ + b_) % m_`.
+     */
     function modadd(
         call call_,
         uint512 a_,
@@ -225,12 +353,29 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs modular addition assignment on the first 512-bit unsigned integer addend.
+     * @dev Updates the value of `a_` to `(a_ + b_) % m_`.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param a_ The first addend.
+     * @param b_ The second addend.
+     * @param m_ The modulus.
+     */
     function modaddAssign(call call_, uint512 a_, uint512 b_, uint512 m_) internal view {
         unchecked {
             _modadd(call_, a_, b_, m_, a_);
         }
     }
 
+    /**
+     * @notice Performs modular addition and stores the result in a separate 512-bit unsigned integer.
+     * @dev Assigns the result of `(a_ + b_) % m_` to `to_`.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param a_ The first addend.
+     * @param b_ The second addend.
+     * @param m_ The modulus.
+     * @param to_ The target 512-bit unsigned integer to store the result.
+     */
     function modaddAssignTo(
         call call_,
         uint512 a_,
@@ -243,6 +388,12 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Adds two 512-bit unsigned integers.
+     * @param a_ The first addend.
+     * @param b_ The second addend.
+     * @return r_ The result of the addition.
+     */
     function add(uint512 a_, uint512 b_) internal pure returns (uint512 r_) {
         unchecked {
             r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
@@ -251,18 +402,40 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs addition assignment on the first 512-bit unsigned addend.
+     * @dev Updates the value of `a_` to `a_ + b_`.
+     * @param a_ The first addend.
+     * @param b_ The second addend.
+     */
     function addAssign(uint512 a_, uint512 b_) internal pure {
         unchecked {
             _add(a_, b_, a_);
         }
     }
 
+    /**
+     * @notice Performs addition and stores the result in a separate 512-bit unsigned integer.
+     * @dev Assigns the result of `a_ + b_` to `to_`.
+     * @param a_ The first addend.
+     * @param b_ The second addend.
+     * @param to_ The target 512-bit unsigned integer to store the result.
+     */
     function addAssignTo(uint512 a_, uint512 b_, uint512 to_) internal pure {
         unchecked {
             _add(a_, b_, to_);
         }
     }
 
+    /**
+     * @notice Adds two 512-bit unsigned integers under a modulus.
+     * @dev This is an optimized version of `modadd` where the inputs must be pre-reduced by `m_`.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param a_ The first addend, reduced by `m_`.
+     * @param b_ The second addend, reduced by `m_`.
+     * @param m_ The modulus.
+     * @return r_ The result of the modular addition `(a_ + b_) % m_`.
+     */
     function redadd(
         call call_,
         uint512 a_,
@@ -276,12 +449,29 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs modular addition assignment on the first 512-bit unsigned integer addend.
+     * @dev This is an optimized version of `modaddAssign` where the inputs must be pre-reduced by `m_`.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param a_ The first addend, reduced by `m_`.
+     * @param b_ The second addend, reduced by `m_`.
+     * @param m_ The modulus.
+     */
     function redaddAssign(call call_, uint512 a_, uint512 b_, uint512 m_) internal pure {
         unchecked {
             _redadd(call_, a_, b_, m_, a_);
         }
     }
 
+    /**
+     * @notice Performs modular addition and stores the result in a separate 512-bit unsigned integer.
+     * @dev This is an optimized version of `modaddAssignTo` where the inputs must be pre-reduced by `m_`.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param a_ The first addend, reduced by `m_`.
+     * @param b_ The second addend, reduced by `m_`.
+     * @param m_ The modulus.
+     * @param to_ The target 512-bit unsigned integer to store the result.
+     */
     function redaddAssignTo(
         call call_,
         uint512 a_,
@@ -294,6 +484,14 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Subtracts one 512-bit unsigned integer from another under a modulus.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param a_ The minuend.
+     * @param b_ The subtrahend.
+     * @param m_ The modulus.
+     * @return r_ The result of the modular subtraction `(a_ - b_) % m_`.
+     */
     function modsub(
         call call_,
         uint512 a_,
@@ -307,12 +505,27 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs modular subtraction assignment on the 512-bit unsigned integer minuend.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param a_ The minuend.
+     * @param b_ The subtrahend.
+     * @param m_ The modulus.
+     */
     function modsubAssign(call call_, uint512 a_, uint512 b_, uint512 m_) internal view {
         unchecked {
             _modsub(call_, a_, b_, m_, a_);
         }
     }
 
+    /**
+     * @notice Performs modular subtraction and stores the result in a separate 512-bit unsigned integer.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param a_ The minuend.
+     * @param b_ The subtrahend.
+     * @param m_ The modulus.
+     * @param to_ The target 512-bit unsigned integer to store the result.
+     */
     function modsubAssignTo(
         call call_,
         uint512 a_,
@@ -325,6 +538,12 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Subtracts one 512-bit unsigned integer from another.
+     * @param a_ The minuend.
+     * @param b_ The subtrahend.
+     * @return r_ The result of the subtraction.
+     */
     function sub(uint512 a_, uint512 b_) internal pure returns (uint512 r_) {
         unchecked {
             r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
@@ -333,18 +552,40 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs subtraction assignment on the 512-bit unsigned minuend.
+     * @dev Updates the value of `a_` to `a_ - b_`.
+     * @param a_ The minuend.
+     * @param b_ The subtrahend.
+     */
     function subAssign(uint512 a_, uint512 b_) internal pure {
         unchecked {
             _sub(a_, b_, a_);
         }
     }
 
+    /**
+     * @notice Performs subtraction and stores the result in a separate 512-bit unsigned integer.
+     * @dev Assigns the result of `a_ - b_` to `to_`.
+     * @param a_ The minuend.
+     * @param b_ The subtrahend.
+     * @param to_ The target 512-bit unsigned integer to store the result.
+     */
     function subAssignTo(uint512 a_, uint512 b_, uint512 to_) internal pure {
         unchecked {
             _sub(a_, b_, to_);
         }
     }
 
+    /**
+     * @notice Subtracts one 512-bit unsigned integer from another under a modulus.
+     * @dev This is an optimized version of `modsub` where the inputs must be pre-reduced by `m_`.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param a_ The minuend, reduced by `m_`.
+     * @param b_ The subtrahend, reduced by `m_`.
+     * @param m_ The modulus.
+     * @return r_ The result of the modular subtraction `(a_ - b_) % m_`.
+     */
     function redsub(
         call call_,
         uint512 a_,
@@ -358,12 +599,29 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs modular subtraction assignment on the 512-bit unsigned integer minuend.
+     * @dev This is an optimized version of `modsubAssign` where the inputs must be pre-reduced by `m_`.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param a_ The minuend, reduced by `m_`.
+     * @param b_ The subtrahend, reduced by `m_`.
+     * @param m_ The modulus.
+     */
     function redsubAssign(call call_, uint512 a_, uint512 b_, uint512 m_) internal pure {
         unchecked {
             _redsub(call_, a_, b_, m_, a_);
         }
     }
 
+    /**
+     * @notice Performs modular subtraction and stores the result in a separate 512-bit unsigned integer.
+     * @dev This is an optimized version of `modsubAssignTo` where the inputs must be pre-reduced by `m_`.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param a_ The minuend, reduced by `m_`.
+     * @param b_ The subtrahend, reduced by `m_`.
+     * @param m_ The modulus.
+     * @param to_ The target 512-bit unsigned integer to store the result.
+     */
     function redsubAssignTo(
         call call_,
         uint512 a_,
@@ -376,6 +634,14 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Multiplies two 512-bit unsigned integers under a modulus.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param a_ The first factor.
+     * @param b_ The second factor.
+     * @param m_ The modulus.
+     * @return r_ The result of the modular multiplication `(a_ * b_) % m_`.
+     */
     function modmul(
         call call_,
         uint512 a_,
@@ -389,12 +655,29 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs modular multiplication assignment on the first 512-bit unsigned integer factor.
+     * @dev Updates the value of `a_` to `(a_ * b_) % m_`.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param a_ The first factor.
+     * @param b_ The second factor.
+     * @param m_ The modulus.
+     */
     function modmulAssign(call call_, uint512 a_, uint512 b_, uint512 m_) internal view {
         unchecked {
             _modmul(call_, a_, b_, m_, a_);
         }
     }
 
+    /**
+     * @notice Performs modular multiplication and stores the result in a separate 512-bit unsigned integer.
+     * @dev Assigns the result of `(a_ * b_) % m_` to `to_`.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param a_ The first factor.
+     * @param b_ The second factor.
+     * @param m_ The modulus.
+     * @param to_ The target 512-bit unsigned integer to store the result.
+     */
     function modmulAssignTo(
         call call_,
         uint512 a_,
@@ -407,6 +690,12 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Multiplies two 512-bit unsigned integers.
+     * @param a_ The first factor.
+     * @param b_ The second factor.
+     * @return r_ The result of the multiplication.
+     */
     function mul(uint512 a_, uint512 b_) internal pure returns (uint512 r_) {
         unchecked {
             r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
@@ -415,18 +704,41 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs multiplication assignment on the first 512-bit unsigned factor.
+     * @dev Updates the value of `a_` to `a_ * b_`.
+     * @param a_ The first factor.
+     * @param b_ The second factor.
+     */
     function mulAssign(uint512 a_, uint512 b_) internal pure {
         unchecked {
             _mul(a_, b_, a_);
         }
     }
 
+    /**
+     * @notice Performs multiplication and stores the result in a separate 512-bit unsigned integer.
+     * @dev Assigns the result of `a_ * b_` to `to_`.
+     * @param a_ The first factor.
+     * @param b_ The second factor.
+     * @param to_ The target 512-bit unsigned integer to store the result.
+     */
     function mulAssignTo(uint512 a_, uint512 b_, uint512 to_) internal pure {
         unchecked {
             _mul(a_, b_, to_);
         }
     }
 
+    /**
+     * @notice Divides two 512-bit unsigned integers under a modulus.
+     * @dev Warning: The modulus `m_` must be a prime number.
+     * @dev Returns the result of `(a_ * b_^(-1)) % m_`.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param a_ The dividend.
+     * @param b_ The divisor.
+     * @param m_ The modulus.
+     * @return r_ The result of the modular division.
+     */
     function moddiv(
         call call_,
         uint512 a_,
@@ -440,12 +752,31 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs the modular division assignment on a 512-bit unsigned dividend.
+     * @dev Warning: The modulus `m_` must be a prime number.
+     * @dev Updates the value of `a_` to `(a_ * b_^(-1)) % m_`.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param a_ The dividend.
+     * @param b_ The divisor.
+     * @param m_ The modulus.
+     */
     function moddivAssign(call call_, uint512 a_, uint512 b_, uint512 m_) internal view {
         unchecked {
             _moddiv(call_, a_, b_, m_, a_);
         }
     }
 
+    /**
+     * @notice Performs the modular division and stores the result in a separate 512-bit unsigned integer.
+     * @dev Warning: The modulus `m_` must be a prime number.
+     * @dev Assigns the result of `(a_ * b_^(-1)) % m_` to `to_`.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param a_ The dividend.
+     * @param b_ The divisor.
+     * @param m_ The modulus.
+     * @param to_ The target 512-bit unsigned integer to store the result.
+     */
     function moddivAssignTo(
         call call_,
         uint512 a_,
@@ -458,6 +789,10 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs modular arithmetic using the EVM precompiled contract.
+     * @dev Computes `(a_ % m_)` and stores the result in `r_`.
+     */
     function _mod(call call_, uint512 a_, uint512 m_, uint512 r_) private view {
         unchecked {
             assembly {
@@ -475,6 +810,10 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs modular exponentiation using the EVM precompiled contract.
+     * @dev Computes `(a_^e_) % m_` and stores the result in `r_`.
+     */
     function _modexp(call call_, uint512 a_, uint512 e_, uint512 m_, uint512 r_) private view {
         unchecked {
             assembly {
@@ -493,6 +832,11 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Computes the modular inverse using the EVM precompiled contract.
+     * @dev The modulus `m_` must be a prime number.
+     * @dev Computes `a_^(-1) % m_` and stores the result in `r_`.
+     */
     function _modinv(call call_, uint512 a_, uint512 m_, uint512 r_) private view {
         unchecked {
             uint512 buffer_ = _buffer(call_);
@@ -520,6 +864,10 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs addition of two 512-bit unsigned integers.
+     * @dev Computes `a_ + b_` and stores the result in `r_`.
+     */
     function _add(uint512 a_, uint512 b_, uint512 r_) private pure {
         unchecked {
             assembly {
@@ -536,6 +884,10 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs modular addition using the EVM precompiled contract.
+     * @dev Computes `(a_ + b_) % m_` and stores the result in `r_`.
+     */
     function _modadd(call call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) private view {
         unchecked {
             assembly {
@@ -562,6 +914,10 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs reduced modular addition of two 512-bit unsigned integers.
+     * @dev Computes `(a_ + b_) % m_` assuming `a_` and `b_` are already reduced by `m_`.
+     */
     function _redadd(call call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) private pure {
         unchecked {
             uint512 buffer_ = _buffer(call_);
@@ -592,6 +948,10 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs subtraction of two 512-bit unsigned integers.
+     * @dev Computes `a_ - b_` and stores the result in `r_`.
+     */
     function _sub(uint512 a_, uint512 b_, uint512 r_) private pure {
         unchecked {
             assembly {
@@ -608,6 +968,10 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs modular subtraction using the EVM precompiled contract.
+     * @dev Computes `(a_ - b_) % m_` and stores the result in `r_`.
+     */
     function _modsub(call call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) private view {
         unchecked {
             int cmp_ = cmp(a_, b_);
@@ -637,6 +1001,10 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs reduced modular subtraction of two 512-bit unsigned integers.
+     * @dev Computes `(a_ - b_) % m_` assuming `a_` and `b_` are already reduced by `m_`.
+     */
     function _redsub(call call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) private pure {
         unchecked {
             if (cmp(a_, b_) >= 0) {
@@ -651,6 +1019,10 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Multiplies two 512-bit unsigned integers.
+     * @dev Computes `a_ * b_` and stores the result in `r_`.
+     */
     function _mul(uint512 a_, uint512 b_, uint512 r_) private pure {
         unchecked {
             assembly {
@@ -676,6 +1048,10 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Prepares intermediate results for modular multiplication.
+     * @dev Calculates partial products and stores them in `call_` for further processing.
+     */
     function _modmul2p(call call_, uint512 a_, uint512 b_) private pure {
         unchecked {
             assembly {
@@ -738,6 +1114,10 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs modular multiplication using the EVM precompiled contract.
+     * @dev Computes `(a_ * b_) % m_` and stores the result in `r_`.
+     */
     function _modmul(call call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) private view {
         unchecked {
             _modmul2p(call_, a_, b_);
@@ -755,6 +1135,11 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Computes the modular division using the EVM precompiled contract.
+     * @dev The modulus `m_` must be a prime number.
+     * @dev Computes `(a_ * b_^(-1)) % m_` and stores the result in `r_`.
+     */
     function _moddiv(call call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) internal view {
         unchecked {
             uint512 buffer_ = _buffer(call_);
@@ -775,6 +1160,9 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Calculates a memory pointer for a buffer based on the provided `call_` pointer.
+     */
     function _buffer(call call_) private pure returns (uint512 buffer_) {
         unchecked {
             assembly {
@@ -783,6 +1171,9 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Allocates a specified amount of memory and updates the free memory pointer.
+     */
     function _allocate(uint256 bytes_) private pure returns (uint256 handler_) {
         unchecked {
             assembly {
diff --git a/contracts/mock/libs/bn/U512Mock.sol b/contracts/mock/libs/bn/U512Mock.sol
index 74737057..9111b415 100644
--- a/contracts/mock/libs/bn/U512Mock.sol
+++ b/contracts/mock/libs/bn/U512Mock.sol
@@ -12,7 +12,7 @@ contract U512Mock {
         uint256 u256_
     )
         external
-        view
+        pure
         returns (
             uint512 pointerOriginal_,
             uint512 pointerCopy_,
@@ -27,7 +27,7 @@ contract U512Mock {
         valueCopy_ = U512.toBytes(pointerCopy_);
     }
 
-    function isNull(uint512 pointer_) external view returns (bool isNull_) {
+    function isNull(uint512 pointer_) external pure returns (bool isNull_) {
         return U512.isNull(pointer_);
     }
 
diff --git a/package.json b/package.json
index 1ba0c1c1..ac907dd0 100644
--- a/package.json
+++ b/package.json
@@ -23,7 +23,7 @@
   "scripts": {
     "prepare": "husky",
     "compile": "npx hardhat compile --force",
-    "coverage": "npx hardhat coverage --solcoverjs ./.solcover.ts",
+    "coverage": "NODE_OPTIONS='--max-old-space-size=8192' npx hardhat coverage --solcoverjs ./.solcover.ts",
     "test": "npx hardhat test",
     "private-network": "npx hardhat node",
     "lint-fix": "npm run lint-sol-fix && npm run lint-ts-fix && npm run lint-json-fix",
diff --git a/test/libs/bn/U512.test.ts b/test/libs/bn/U512.test.ts
index d0c024f8..cd8ee952 100644
--- a/test/libs/bn/U512.test.ts
+++ b/test/libs/bn/U512.test.ts
@@ -4,7 +4,7 @@ import { Reverter } from "@/test/helpers/reverter";
 
 import { U512Mock } from "@ethers-v6";
 
-describe.only("U512", () => {
+describe("U512", () => {
   const reverter = new Reverter();
 
   const prime = 76884956397045344220809746629001649092737531784414529538755519063063536359079n;
diff --git a/test/libs/crypto/ECDSA384.test.ts b/test/libs/crypto/ECDSA384.test.ts
index 9294b951..de29aaac 100644
--- a/test/libs/crypto/ECDSA384.test.ts
+++ b/test/libs/crypto/ECDSA384.test.ts
@@ -168,7 +168,7 @@ describe("ECDSA384", () => {
     });
   });
 
-  describe.only("brainpoolP384r1", () => {
+  describe("brainpoolP384r1", () => {
     const signature =
       "0x42d803dcea3f9809cda4ce5a541d969dbeacd6ab7bef7788db1e4a00dac3ae87c1c241c24bb39e041725e607718fc322306b08967b56e4e49d7c9afc48833f580ac9b49cdcec0962d564f89a8f0b57a9742573ebcbe709869253e8b466cb33be";
     const pubKey =
diff --git a/test/libs/crypto/ECDSA512.test.ts b/test/libs/crypto/ECDSA512.test.ts
index 5b235a5d..240a596c 100644
--- a/test/libs/crypto/ECDSA512.test.ts
+++ b/test/libs/crypto/ECDSA512.test.ts
@@ -19,7 +19,7 @@ describe("ECDSA512", () => {
 
   afterEach(reverter.revert);
 
-  describe.only("brainpoolP512r1", () => {
+  describe("brainpoolP512r1", () => {
     const signature =
       "0x0bd2593447cc6c02caf99d60418dd42e9a194c910e6755ed0c7059acac656b04ccfe1e8348462ee43066823aee2fed7ca012e9890dfb69866d7ae88b6506f9c744b42304e693796618d090dbcb2a2551c3cb78534611e61fd9d1a5c0938b5b8ec6ed53d2d28999eabbd8e7792d167fcf582492403a6a0f7cc94c73a28fb76b71";
     const pubKey =

From 82a88c0b89a6dd11203645d4e2397dc4ca3fc07e Mon Sep 17 00:00:00 2001
From: mllwchrry <mariia.zhvanko@gmail.com>
Date: Fri, 24 Jan 2025 16:26:20 +0200
Subject: [PATCH 27/42] add operator overloading

---
 contracts/libs/bn/U512.sol          | 21 ++++++++++++++-
 contracts/mock/libs/bn/U512Mock.sol | 40 +++++++++++++++++++++++++++++
 test/libs/bn/U512.test.ts           |  6 +++++
 3 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/contracts/libs/bn/U512.sol b/contracts/libs/bn/U512.sol
index 3af41808..ea5386fb 100644
--- a/contracts/libs/bn/U512.sol
+++ b/contracts/libs/bn/U512.sol
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: MIT
-pragma solidity ^0.8.4;
+pragma solidity ^0.8.19;
 
 type uint512 is uint256;
 type call is uint256;
@@ -1183,3 +1183,22 @@ library U512 {
         }
     }
 }
+
+// Operator overloading pure functions
+function add(uint512 a_, uint512 b_) pure returns (uint512 r_) {
+    return U512.add(a_, b_);
+}
+
+function sub(uint512 a_, uint512 b_) pure returns (uint512 r_) {
+    return U512.sub(a_, b_);
+}
+
+function mul(uint512 a_, uint512 b_) pure returns (uint512 r_) {
+    return U512.mul(a_, b_);
+}
+
+function eq(uint512 a_, uint512 b_) pure returns (bool eq_) {
+    return U512.eq(a_, b_);
+}
+
+using {add as +, sub as -, mul as *, eq as ==} for uint512 global;
diff --git a/contracts/mock/libs/bn/U512Mock.sol b/contracts/mock/libs/bn/U512Mock.sol
index 9111b415..87500416 100644
--- a/contracts/mock/libs/bn/U512Mock.sol
+++ b/contracts/mock/libs/bn/U512Mock.sol
@@ -38,6 +38,16 @@ contract U512Mock {
         return U512.eq(a_, b_);
     }
 
+    function eqOperator(
+        bytes memory aBytes_,
+        bytes memory bBytes_
+    ) external view returns (bool eq_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+
+        return a_ == b_;
+    }
+
     function eqUint256(bytes memory aBytes_, uint256 u256_) external view returns (bool eq_) {
         uint512 a_ = U512.fromBytes(aBytes_);
 
@@ -145,6 +155,16 @@ contract U512Mock {
         return U512.add(a_, b_).toBytes();
     }
 
+    function addOperator(
+        bytes memory aBytes_,
+        bytes memory bBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+
+        return (a_ + b_).toBytes();
+    }
+
     function addAssign(
         bytes memory aBytes_,
         bytes memory bBytes_
@@ -181,6 +201,16 @@ contract U512Mock {
         return U512.sub(a_, b_).toBytes();
     }
 
+    function subOperator(
+        bytes memory aBytes_,
+        bytes memory bBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+
+        return (a_ - b_).toBytes();
+    }
+
     function subAssign(
         bytes memory aBytes_,
         bytes memory bBytes_
@@ -217,6 +247,16 @@ contract U512Mock {
         return U512.mul(a_, b_).toBytes();
     }
 
+    function mulOperator(
+        bytes memory aBytes_,
+        bytes memory bBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+
+        return (a_ * b_).toBytes();
+    }
+
     function mulAssign(
         bytes memory aBytes_,
         bytes memory bBytes_
diff --git a/test/libs/bn/U512.test.ts b/test/libs/bn/U512.test.ts
index cd8ee952..37c94466 100644
--- a/test/libs/bn/U512.test.ts
+++ b/test/libs/bn/U512.test.ts
@@ -132,8 +132,11 @@ describe("U512", () => {
 
   it("eq test", async () => {
     expect(await u512.eq(toBytes(1020n), toBytes(1002n))).to.be.false;
+    expect(await u512.eqOperator(toBytes(1020n), toBytes(1002n))).to.be.false;
     expect(await u512.eq(toBytes(200n), toBytes(200n))).to.be.true;
+    expect(await u512.eqOperator(toBytes(200n), toBytes(200n))).to.be.true;
     expect(await u512.eq("0x00", "0x00")).to.be.true;
+    expect(await u512.eqOperator("0x00", "0x00")).to.be.true;
   });
 
   it("eqUint256 test", async () => {
@@ -183,6 +186,7 @@ describe("U512", () => {
       const to = randomU512();
 
       expect(await u512.add(a, b)).to.be.equal(add(a, b));
+      expect(await u512.addOperator(a, b)).to.be.equal(add(a, b));
       expect(await u512.addAssign(a, b)).to.be.equal(add(a, b));
       expect(await u512.addAssignTo(a, b, to)).to.be.equal(add(a, b));
     }
@@ -195,6 +199,7 @@ describe("U512", () => {
       const to = randomU512();
 
       expect(await u512.sub(a, b)).to.be.equal(sub(a, b));
+      expect(await u512.subOperator(a, b)).to.be.equal(sub(a, b));
       expect(await u512.subAssign(a, b)).to.be.equal(sub(a, b));
       expect(await u512.subAssignTo(a, b, to)).to.be.equal(sub(a, b));
     }
@@ -207,6 +212,7 @@ describe("U512", () => {
       const to = randomU512();
 
       expect(await u512.mul(a, b)).to.be.equal(mul(a, b));
+      expect(await u512.mulOperator(a, b)).to.be.equal(mul(a, b));
       expect(await u512.mulAssign(a, b)).to.be.equal(mul(a, b));
       expect(await u512.mulAssignTo(a, b, to)).to.be.equal(mul(a, b));
     }

From 936c96676503049d3129c060cb8e00fecef399a4 Mon Sep 17 00:00:00 2001
From: mllwchrry <mariia.zhvanko@gmail.com>
Date: Fri, 24 Jan 2025 18:26:55 +0200
Subject: [PATCH 28/42] modify moddiv test

---
 test/libs/bn/U512.test.ts | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/test/libs/bn/U512.test.ts b/test/libs/bn/U512.test.ts
index 37c94466..6502d9d0 100644
--- a/test/libs/bn/U512.test.ts
+++ b/test/libs/bn/U512.test.ts
@@ -108,6 +108,17 @@ describe("U512", () => {
     return toBytes((((aBn - bBn) % mBn) + mBn) % mBn);
   }
 
+  function moddiv(a: string, b: string, m: string) {
+    const aBigInt = ethers.toBigInt(a);
+    const mBigInt = ethers.toBigInt(m);
+
+    const bInv = modinv(b, m);
+
+    const result = (aBigInt * ethers.toBigInt(bInv)) % mBigInt;
+
+    return toBytes(result);
+  }
+
   before(async () => {
     const U512Mock = await ethers.getContractFactory("U512Mock");
 
@@ -303,15 +314,14 @@ describe("U512", () => {
   it("moddiv test", async () => {
     const m = toBytes(prime);
 
-    const a = toBytes(779149564533142355434093157610126726613246737199n);
-    const b = toBytes(29118654464229156312755475164902924590603964377702716942232927993582928167089n);
-
-    const to = randomU512();
-
-    const expected = toBytes(30823410400962253491978005949535646087432096635784775122170630924100507445065n);
+    for (let i = 0; i < 100; ++i) {
+      const a = randomU512();
+      const b = randomU512();
+      const to = randomU512();
 
-    expect(await u512.moddiv(a, b, m)).to.equal(expected);
-    expect(await u512.moddivAssign(a, b, m)).to.equal(expected);
-    expect(await u512.moddivAssignTo(a, b, m, to)).to.equal(expected);
+      expect(await u512.moddiv(a, b, m)).to.be.equal(moddiv(a, b, m));
+      expect(await u512.moddivAssign(a, b, m)).to.be.equal(moddiv(a, b, m));
+      expect(await u512.moddivAssignTo(a, b, m, to)).to.be.equal(moddiv(a, b, m));
+    }
   });
 });

From 5b188afca4ef6bba2808e37b491e4bd4e02d3042 Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Sun, 26 Jan 2025 14:03:06 +0200
Subject: [PATCH 29/42] rm ops and fixed tests

---
 contracts/libs/bn/U512.sol          | 21 +--------------
 contracts/mock/libs/bn/U512Mock.sol | 40 -----------------------------
 test/libs/bn/U512.test.ts           |  8 +-----
 test/libs/crypto/ECDSA384.test.ts   | 12 +++------
 test/libs/crypto/ECDSA512.test.ts   |  2 +-
 5 files changed, 6 insertions(+), 77 deletions(-)

diff --git a/contracts/libs/bn/U512.sol b/contracts/libs/bn/U512.sol
index ea5386fb..3af41808 100644
--- a/contracts/libs/bn/U512.sol
+++ b/contracts/libs/bn/U512.sol
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: MIT
-pragma solidity ^0.8.19;
+pragma solidity ^0.8.4;
 
 type uint512 is uint256;
 type call is uint256;
@@ -1183,22 +1183,3 @@ library U512 {
         }
     }
 }
-
-// Operator overloading pure functions
-function add(uint512 a_, uint512 b_) pure returns (uint512 r_) {
-    return U512.add(a_, b_);
-}
-
-function sub(uint512 a_, uint512 b_) pure returns (uint512 r_) {
-    return U512.sub(a_, b_);
-}
-
-function mul(uint512 a_, uint512 b_) pure returns (uint512 r_) {
-    return U512.mul(a_, b_);
-}
-
-function eq(uint512 a_, uint512 b_) pure returns (bool eq_) {
-    return U512.eq(a_, b_);
-}
-
-using {add as +, sub as -, mul as *, eq as ==} for uint512 global;
diff --git a/contracts/mock/libs/bn/U512Mock.sol b/contracts/mock/libs/bn/U512Mock.sol
index 87500416..9111b415 100644
--- a/contracts/mock/libs/bn/U512Mock.sol
+++ b/contracts/mock/libs/bn/U512Mock.sol
@@ -38,16 +38,6 @@ contract U512Mock {
         return U512.eq(a_, b_);
     }
 
-    function eqOperator(
-        bytes memory aBytes_,
-        bytes memory bBytes_
-    ) external view returns (bool eq_) {
-        uint512 a_ = U512.fromBytes(aBytes_);
-        uint512 b_ = U512.fromBytes(bBytes_);
-
-        return a_ == b_;
-    }
-
     function eqUint256(bytes memory aBytes_, uint256 u256_) external view returns (bool eq_) {
         uint512 a_ = U512.fromBytes(aBytes_);
 
@@ -155,16 +145,6 @@ contract U512Mock {
         return U512.add(a_, b_).toBytes();
     }
 
-    function addOperator(
-        bytes memory aBytes_,
-        bytes memory bBytes_
-    ) external view returns (bytes memory rBytes_) {
-        uint512 a_ = U512.fromBytes(aBytes_);
-        uint512 b_ = U512.fromBytes(bBytes_);
-
-        return (a_ + b_).toBytes();
-    }
-
     function addAssign(
         bytes memory aBytes_,
         bytes memory bBytes_
@@ -201,16 +181,6 @@ contract U512Mock {
         return U512.sub(a_, b_).toBytes();
     }
 
-    function subOperator(
-        bytes memory aBytes_,
-        bytes memory bBytes_
-    ) external view returns (bytes memory rBytes_) {
-        uint512 a_ = U512.fromBytes(aBytes_);
-        uint512 b_ = U512.fromBytes(bBytes_);
-
-        return (a_ - b_).toBytes();
-    }
-
     function subAssign(
         bytes memory aBytes_,
         bytes memory bBytes_
@@ -247,16 +217,6 @@ contract U512Mock {
         return U512.mul(a_, b_).toBytes();
     }
 
-    function mulOperator(
-        bytes memory aBytes_,
-        bytes memory bBytes_
-    ) external view returns (bytes memory rBytes_) {
-        uint512 a_ = U512.fromBytes(aBytes_);
-        uint512 b_ = U512.fromBytes(bBytes_);
-
-        return (a_ * b_).toBytes();
-    }
-
     function mulAssign(
         bytes memory aBytes_,
         bytes memory bBytes_
diff --git a/test/libs/bn/U512.test.ts b/test/libs/bn/U512.test.ts
index 6502d9d0..cd69bd43 100644
--- a/test/libs/bn/U512.test.ts
+++ b/test/libs/bn/U512.test.ts
@@ -4,7 +4,7 @@ import { Reverter } from "@/test/helpers/reverter";
 
 import { U512Mock } from "@ethers-v6";
 
-describe("U512", () => {
+describe.only("U512", () => {
   const reverter = new Reverter();
 
   const prime = 76884956397045344220809746629001649092737531784414529538755519063063536359079n;
@@ -143,11 +143,8 @@ describe("U512", () => {
 
   it("eq test", async () => {
     expect(await u512.eq(toBytes(1020n), toBytes(1002n))).to.be.false;
-    expect(await u512.eqOperator(toBytes(1020n), toBytes(1002n))).to.be.false;
     expect(await u512.eq(toBytes(200n), toBytes(200n))).to.be.true;
-    expect(await u512.eqOperator(toBytes(200n), toBytes(200n))).to.be.true;
     expect(await u512.eq("0x00", "0x00")).to.be.true;
-    expect(await u512.eqOperator("0x00", "0x00")).to.be.true;
   });
 
   it("eqUint256 test", async () => {
@@ -197,7 +194,6 @@ describe("U512", () => {
       const to = randomU512();
 
       expect(await u512.add(a, b)).to.be.equal(add(a, b));
-      expect(await u512.addOperator(a, b)).to.be.equal(add(a, b));
       expect(await u512.addAssign(a, b)).to.be.equal(add(a, b));
       expect(await u512.addAssignTo(a, b, to)).to.be.equal(add(a, b));
     }
@@ -210,7 +206,6 @@ describe("U512", () => {
       const to = randomU512();
 
       expect(await u512.sub(a, b)).to.be.equal(sub(a, b));
-      expect(await u512.subOperator(a, b)).to.be.equal(sub(a, b));
       expect(await u512.subAssign(a, b)).to.be.equal(sub(a, b));
       expect(await u512.subAssignTo(a, b, to)).to.be.equal(sub(a, b));
     }
@@ -223,7 +218,6 @@ describe("U512", () => {
       const to = randomU512();
 
       expect(await u512.mul(a, b)).to.be.equal(mul(a, b));
-      expect(await u512.mulOperator(a, b)).to.be.equal(mul(a, b));
       expect(await u512.mulAssign(a, b)).to.be.equal(mul(a, b));
       expect(await u512.mulAssignTo(a, b, to)).to.be.equal(mul(a, b));
     }
diff --git a/test/libs/crypto/ECDSA384.test.ts b/test/libs/crypto/ECDSA384.test.ts
index de29aaac..56f3c35e 100644
--- a/test/libs/crypto/ECDSA384.test.ts
+++ b/test/libs/crypto/ECDSA384.test.ts
@@ -30,7 +30,7 @@ function modifyRight(value: string, modifier: string): string {
   return newSignature;
 }
 
-describe("ECDSA384", () => {
+describe.only("ECDSA384", () => {
   const reverter = new Reverter();
 
   let ecdsa384: ECDSA384Mock;
@@ -104,22 +104,16 @@ describe("ECDSA384", () => {
         expect(await ecdsa384.verifySECP384r1(message, modifiedSig, pubKey)).to.be.false;
       });
 
-      it("should revert if curve parameters have an invalid length", async () => {
-        await expect(
-          ecdsa384.verifySECP384r1CustomCurveParameters(message, signature, pubKey, "0x", "0x"),
-        ).to.be.revertedWith("U384: not 384");
-      });
-
       it("should revert if signature or public key has an invalid length", async () => {
         const wrongSig =
           "0x3066023100a2fcd465ab5b507fc55941c1c6cd8286de04b83c94c6be25b5bdf58e27d86c3759d5f94ffcbd009618b6371bc51994f0023100d708d5045caa4a61cad42622c14bfb3343a5a9dc8fdbd19ce46b9e24c2aff84ba5114bb543fc4b0099f369079302b721";
 
-        await expect(ecdsa384.verifySECP384r1(message, wrongSig, pubKey)).to.be.revertedWith("U384: not 768");
+        await expect(ecdsa384.verifySECP384r1(message, wrongSig, pubKey)).to.be.reverted;
 
         const wrongPubKey =
           "0x3076301006072a8648ce3d020106052b81040022036200041d77728fada41a8a7a23fe922e4e2dc8881a94b72a0612077ad80eeef13ff3bbea92aeef36a0f65885417aea104b86b76aedc226e260f7d0eeea8405b9269f354d929e5a98cab64fe192db94ed9335b7395e38e99b8bfaf32effa163a92889f9";
 
-        await expect(ecdsa384.verifySECP384r1(message, signature, wrongPubKey)).to.be.revertedWith("U384: not 768");
+        await expect(ecdsa384.verifySECP384r1(message, signature, wrongPubKey)).to.be.reverted;
       });
 
       it("should not revert when message is hashed using SHA-384", async () => {
diff --git a/test/libs/crypto/ECDSA512.test.ts b/test/libs/crypto/ECDSA512.test.ts
index 240a596c..7a8facf8 100644
--- a/test/libs/crypto/ECDSA512.test.ts
+++ b/test/libs/crypto/ECDSA512.test.ts
@@ -4,7 +4,7 @@ import { Reverter } from "@/test/helpers/reverter";
 
 import { ECDSA512Mock } from "@ethers-v6";
 
-describe("ECDSA512", () => {
+describe.only("ECDSA512", () => {
   const reverter = new Reverter();
 
   let ecdsa512: ECDSA512Mock;

From cfc730b7514f8a8e9a7103796a80b263ebdbb6af Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Sun, 26 Jan 2025 15:29:06 +0200
Subject: [PATCH 30/42] added assign & call & bitwise ops

---
 contracts/libs/bn/U512.sol          | 507 +++++++++++++++++++++++++++-
 contracts/mock/libs/bn/U512Mock.sol | 319 +++++++++++++++++
 test/libs/bn/U512.test.ts           | 122 ++++++-
 test/libs/crypto/ECDSA384.test.ts   |   2 +-
 test/libs/crypto/ECDSA512.test.ts   |   2 +-
 5 files changed, 936 insertions(+), 16 deletions(-)

diff --git a/contracts/libs/bn/U512.sol b/contracts/libs/bn/U512.sol
index 3af41808..a0d95c4d 100644
--- a/contracts/libs/bn/U512.sol
+++ b/contracts/libs/bn/U512.sol
@@ -19,6 +19,10 @@ library U512 {
     function initCall() internal pure returns (call call_) {
         unchecked {
             call_ = call.wrap(_allocate(_CALL_ALLOCATION));
+
+            assembly {
+                call_ := add(call_, 0x40)
+            }
         }
     }
 
@@ -85,6 +89,20 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Assigns a 512-bit unsigned integer to another.
+     * @param u512_ The 512-bit unsigned integer to assign.
+     * @param to_ The target 512-bit unsigned integer to store the result.
+     */
+    function assign(uint512 u512_, uint512 to_) internal pure {
+        unchecked {
+            assembly {
+                mstore(to_, mload(u512_))
+                mstore(add(to_, 0x20), mload(add(u512_, 0x20)))
+            }
+        }
+    }
+
     /**
      * @notice Converts a 512-bit unsigned integer to a byte array.
      * @param u512_ The 512-bit unsigned integer to convert.
@@ -204,6 +222,22 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs modular arithmetic on 512-bit integers.
+     * @dev Allocates memory for `call` every time it's called.
+     * @param a_ The dividend.
+     * @param m_ The modulus.
+     * @return r_ The result of the modular operation `(a_ % m_)`.
+     */
+    function mod(uint512 a_, uint512 m_) internal view returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
+            call call_ = initCall();
+
+            _mod(call_, a_, m_, r_);
+        }
+    }
+
     /**
      * @notice Performs modular assignment on a 512-bit unsigned integer.
      * @dev Updates the value of `a_` to `(a_ % m_)`.
@@ -247,6 +281,23 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Computes the modular inverse of a 512-bit unsigned integer.
+     * @dev Warning: The modulus `m_` must be a prime number
+     * @dev Allocates memory for `call` every time it's called.
+     * @param a_ The 512-bit unsigned integer to invert.
+     * @param m_ The modulus.
+     * @return r_ The modular inverse result `a_^(-1) % m_`.
+     */
+    function modinv(uint512 a_, uint512 m_) internal view returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
+            call call_ = initCall();
+
+            _modinv(call_, a_, m_, r_);
+        }
+    }
+
     /**
      * @notice Performs the modular inverse assignment on a 512-bit unsigned integer.
      * @dev Warning: The modulus `m_` must be a prime number
@@ -297,6 +348,23 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs modular exponentiation on 512-bit unsigned integers.
+     * @dev Allocates memory for `call` every time it's called.
+     * @param b_ The base.
+     * @param e_ The exponent.
+     * @param m_ The modulus.
+     * @return r_ The result of modular exponentiation `(b_^e_) % m_`.
+     */
+    function modexp(uint512 b_, uint512 e_, uint512 m_) internal view returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
+            call call_ = initCall();
+
+            _modexp(call_, b_, e_, m_, r_);
+        }
+    }
+
     /**
      * @notice Performs modular exponentiation assignment on the base.
      * @dev Updates the value of `b_` to `(b_^e_) % m_`.
@@ -353,6 +421,23 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Adds two 512-bit unsigned integers under a modulus.
+     * @dev Allocates memory for `call` every time it's called.
+     * @param a_ The first addend.
+     * @param b_ The second addend.
+     * @param m_ The modulus.
+     * @return r_ The result of the modular addition `(a_ + b_) % m_`.
+     */
+    function modadd(uint512 a_, uint512 b_, uint512 m_) internal view returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
+            call call_ = initCall();
+
+            _modadd(call_, a_, b_, m_, r_);
+        }
+    }
+
     /**
      * @notice Performs modular addition assignment on the first 512-bit unsigned integer addend.
      * @dev Updates the value of `a_` to `(a_ + b_) % m_`.
@@ -449,6 +534,26 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Adds two 512-bit unsigned integers under a modulus.
+     * @dev This is an optimized version of `modadd` where the inputs must be pre-reduced by `m_`.
+     * @dev Allocates memory for `call` every time it's called.
+     * @param a_ The first addend, reduced by `m_`.
+     * @param b_ The second addend, reduced by `m_`.
+     * @param m_ The modulus.
+     * @return r_ The result of the modular addition `(a_ + b_) % m_`.
+     */
+    function redadd(uint512 a_, uint512 b_, uint512 m_) internal pure returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
+
+            // `redadd` doesn't make calls, it only requires 2 words for buffer.
+            call call_ = call.wrap(_allocate(_UINT512_ALLOCATION));
+
+            _redadd(call_, a_, b_, m_, r_);
+        }
+    }
+
     /**
      * @notice Performs modular addition assignment on the first 512-bit unsigned integer addend.
      * @dev This is an optimized version of `modaddAssign` where the inputs must be pre-reduced by `m_`.
@@ -505,6 +610,23 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Subtracts one 512-bit unsigned integer from another under a modulus.
+     * @dev Allocates memory for `call` every time it's called.
+     * @param a_ The minuend.
+     * @param b_ The subtrahend.
+     * @param m_ The modulus.
+     * @return r_ The result of the modular subtraction `(a_ - b_) % m_`.
+     */
+    function modsub(uint512 a_, uint512 b_, uint512 m_) internal view returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
+            call call_ = initCall();
+
+            _modsub(call_, a_, b_, m_, r_);
+        }
+    }
+
     /**
      * @notice Performs modular subtraction assignment on the 512-bit unsigned integer minuend.
      * @param call_ A memory pointer for precompile call arguments.
@@ -599,6 +721,26 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Subtracts one 512-bit unsigned integer from another under a modulus.
+     * @dev This is an optimized version of `modsub` where the inputs must be pre-reduced by `m_`.
+     * @dev Allocates memory for `call` every time it's called.
+     * @param a_ The minuend, reduced by `m_`.
+     * @param b_ The subtrahend, reduced by `m_`.
+     * @param m_ The modulus.
+     * @return r_ The result of the modular subtraction `(a_ - b_) % m_`.
+     */
+    function redsub(uint512 a_, uint512 b_, uint512 m_) internal pure returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
+
+            // `redsub` doesn't make calls, it only requires 2 words for buffer.
+            call call_ = call.wrap(_allocate(_UINT512_ALLOCATION));
+
+            _redsub(call_, a_, b_, m_, r_);
+        }
+    }
+
     /**
      * @notice Performs modular subtraction assignment on the 512-bit unsigned integer minuend.
      * @dev This is an optimized version of `modsubAssign` where the inputs must be pre-reduced by `m_`.
@@ -655,6 +797,23 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Multiplies two 512-bit unsigned integers under a modulus.
+     * @dev Allocates memory for `call` every time it's called.
+     * @param a_ The first factor.
+     * @param b_ The second factor.
+     * @param m_ The modulus.
+     * @return r_ The result of the modular multiplication `(a_ * b_) % m_`.
+     */
+    function modmul(uint512 a_, uint512 b_, uint512 m_) internal view returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
+            call call_ = initCall();
+
+            _modmul(call_, a_, b_, m_, r_);
+        }
+    }
+
     /**
      * @notice Performs modular multiplication assignment on the first 512-bit unsigned integer factor.
      * @dev Updates the value of `a_` to `(a_ * b_) % m_`.
@@ -752,6 +911,25 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Divides two 512-bit unsigned integers under a modulus.
+     * @dev Warning: The modulus `m_` must be a prime number.
+     * @dev Returns the result of `(a_ * b_^(-1)) % m_`.
+     * @dev Allocates memory for `call` every time it's called.
+     * @param a_ The dividend.
+     * @param b_ The divisor.
+     * @param m_ The modulus.
+     * @return r_ The result of the modular division.
+     */
+    function moddiv(uint512 a_, uint512 b_, uint512 m_) internal view returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
+            call call_ = initCall();
+
+            _moddiv(call_, a_, b_, m_, r_);
+        }
+    }
+
     /**
      * @notice Performs the modular division assignment on a 512-bit unsigned dividend.
      * @dev Warning: The modulus `m_` must be a prime number.
@@ -789,6 +967,237 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs bitwise AND of two 512-bit unsigned integers.
+     * @param a_ The first 512-bit unsigned integer.
+     * @param b_ The second 512-bit unsigned integer.
+     * @return r_ The result of the bitwise AND operation.
+     */
+    function and(uint512 a_, uint512 b_) internal pure returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
+
+            _and(a_, b_, r_);
+        }
+    }
+
+    /**
+     * @notice Performs bitwise AND assignment on the first 512-bit unsigned integer.
+     * @dev Updates the value of `a_` to `a_ & b_`.
+     * @param a_ The first 512-bit unsigned integer.
+     * @param b_ The second 512-bit unsigned integer.
+     */
+    function andAssign(uint512 a_, uint512 b_) internal pure {
+        unchecked {
+            _and(a_, b_, a_);
+        }
+    }
+
+    /**
+     * @notice Performs bitwise AND and stores the result in a separate 512-bit unsigned integer.
+     * @dev Assigns the result of `a_ & b_` to `to_`.
+     * @param a_ The first 512-bit unsigned integer.
+     * @param b_ The second 512-bit unsigned integer.
+     * @param to_ The target 512-bit unsigned integer to store the result.
+     */
+    function andAssignTo(uint512 a_, uint512 b_, uint512 to_) internal pure {
+        unchecked {
+            _and(a_, b_, to_);
+        }
+    }
+
+    /**
+     * @notice Performs bitwise OR of two 512-bit unsigned integers.
+     * @param a_ The first 512-bit unsigned integer.
+     * @param b_ The second 512-bit unsigned integer.
+     * @return r_ The result of the bitwise OR operation.
+     */
+    function or(uint512 a_, uint512 b_) internal pure returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
+
+            _or(a_, b_, r_);
+        }
+    }
+
+    /**
+     * @notice Performs bitwise OR assignment on the first 512-bit unsigned integer.
+     * @dev Updates the value of `a_` to `a_ | b_`.
+     * @param a_ The first 512-bit unsigned integer.
+     * @param b_ The second 512-bit unsigned integer.
+     */
+    function orAssign(uint512 a_, uint512 b_) internal pure {
+        unchecked {
+            _or(a_, b_, a_);
+        }
+    }
+
+    /**
+     * @notice Performs bitwise OR and stores the result in a separate 512-bit unsigned integer.
+     * @dev Assigns the result of `a_ | b_` to `to_`.
+     * @param a_ The first 512-bit unsigned integer.
+     * @param b_ The second 512-bit unsigned integer.
+     * @param to_ The target 512-bit unsigned integer to store the result.
+     */
+    function orAssignTo(uint512 a_, uint512 b_, uint512 to_) internal pure {
+        unchecked {
+            _or(a_, b_, to_);
+        }
+    }
+
+    /**
+     * @notice Performs bitwise XOR of two 512-bit unsigned integers.
+     * @param a_ The first 512-bit unsigned integer.
+     * @param b_ The second 512-bit unsigned integer.
+     * @return r_ The result of the bitwise XOR operation.
+     */
+    function xor(uint512 a_, uint512 b_) internal pure returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
+
+            _xor(a_, b_, r_);
+        }
+    }
+
+    /**
+     * @notice Performs bitwise XOR assignment on the first 512-bit unsigned integer.
+     * @dev Updates the value of `a_` to `a_ ^ b_`.
+     * @param a_ The first 512-bit unsigned integer.
+     * @param b_ The second 512-bit unsigned integer.
+     */
+    function xorAssign(uint512 a_, uint512 b_) internal pure {
+        unchecked {
+            _xor(a_, b_, a_);
+        }
+    }
+
+    /**
+     * @notice Performs bitwise XOR and stores the result in a separate 512-bit unsigned integer.
+     * @dev Assigns the result of `a_ ^ b_` to `to_`.
+     * @param a_ The first 512-bit unsigned integer.
+     * @param b_ The second 512-bit unsigned integer.
+     * @param to_ The target 512-bit unsigned integer to store the result.
+     */
+    function xorAssignTo(uint512 a_, uint512 b_, uint512 to_) internal pure {
+        unchecked {
+            _xor(a_, b_, to_);
+        }
+    }
+
+    /**
+     * @notice Performs bitwise NOT of a 512-bit unsigned integer.
+     * @param a_ The 512-bit unsigned integer.
+     * @return r_ The result of the bitwise NOT operation.
+     */
+    function not(uint512 a_) internal pure returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
+
+            _not(a_, r_);
+        }
+    }
+
+    /**
+     * @notice Performs bitwise NOT assignment on a 512-bit unsigned integer.
+     * @dev Updates the value of `a_` to `~a_`.
+     * @param a_ The 512-bit unsigned integer.
+     */
+    function notAssign(uint512 a_) internal pure {
+        unchecked {
+            _not(a_, a_);
+        }
+    }
+
+    /**
+     * @notice Performs bitwise NOT and stores the result in a separate 512-bit unsigned integer.
+     * @dev Assigns the result of `~a_` to `to_`.
+     * @param a_ The 512-bit unsigned integer.
+     * @param to_ The target 512-bit unsigned integer to store the result.
+     */
+    function notAssignTo(uint512 a_, uint512 to_) internal pure {
+        unchecked {
+            _not(a_, to_);
+        }
+    }
+
+    /**
+     * @notice Shifts a 512-bit unsigned integer to the left by a specified number of bits.
+     * @param a_ The 512-bit unsigned integer to shift.
+     * @param b_ The number of bits to shift by.
+     * @return r_ The result of the left shift operation.
+     */
+    function shl(uint512 a_, uint8 b_) internal pure returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
+
+            _shl(a_, b_, r_);
+        }
+    }
+
+    /**
+     * @notice Shifts a 512-bit unsigned integer to the left by a specified number of bits.
+     * @dev Updates the value of `a_` to `a_ << b_`.
+     * @param a_ The 512-bit unsigned integer to shift.
+     * @param b_ The number of bits to shift by.
+     */
+    function shlAssign(uint512 a_, uint8 b_) internal pure {
+        unchecked {
+            _shl(a_, b_, a_);
+        }
+    }
+
+    /**
+     * @notice Shifts a 512-bit unsigned integer to the left by a specified number of bits.
+     * @dev Assigns the result of `a_ << b_` to `to_`.
+     * @param a_ The 512-bit unsigned integer to shift.
+     * @param b_ The number of bits to shift by.
+     * @param to_ The target 512-bit unsigned integer to store the result.
+     */
+    function shlAssignTo(uint512 a_, uint8 b_, uint512 to_) internal pure {
+        unchecked {
+            _shl(a_, b_, to_);
+        }
+    }
+
+    /**
+     * @notice Shifts a 512-bit unsigned integer to the right by a specified number of bits.
+     * @param a_ The 512-bit unsigned integer to shift.
+     * @param b_ The number of bits to shift by.
+     * @return r_ The result of the right shift operation.
+     */
+    function shr(uint512 a_, uint8 b_) internal pure returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
+
+            _shr(a_, b_, r_);
+        }
+    }
+
+    /**
+     * @notice Shifts a 512-bit unsigned integer to the right by a specified number of bits.
+     * @dev Updates the value of `a_` to `a_ >> b_`.
+     * @param a_ The 512-bit unsigned integer to shift.
+     * @param b_ The number of bits to shift by.
+     */
+    function shrAssign(uint512 a_, uint8 b_) internal pure {
+        unchecked {
+            _shr(a_, b_, a_);
+        }
+    }
+
+    /**
+     * @notice Shifts a 512-bit unsigned integer to the right by a specified number of bits.
+     * @dev Assigns the result of `a_ >> b_` to `to_`.
+     * @param a_ The 512-bit unsigned integer to shift.
+     * @param b_ The number of bits to shift by.
+     * @param to_ The target 512-bit unsigned integer to store the result.
+     */
+    function shrAssignTo(uint512 a_, uint8 b_, uint512 to_) internal pure {
+        unchecked {
+            _shr(a_, b_, to_);
+        }
+    }
+
     /**
      * @notice Performs modular arithmetic using the EVM precompiled contract.
      * @dev Computes `(a_ % m_)` and stores the result in `r_`.
@@ -941,10 +1350,7 @@ library U512 {
                 return;
             }
 
-            assembly {
-                mstore(r_, mload(buffer_))
-                mstore(add(r_, 0x20), mload(add(buffer_, 0x20)))
-            }
+            assign(buffer_, r_);
         }
     }
 
@@ -1149,24 +1555,105 @@ library U512 {
 
             assembly {
                 mstore(call_, 0x80)
-                mstore(add(0x20, call_), 0x20)
-                mstore(add(0x40, call_), 0x40)
-                mstore(add(0xE0, call_), 0x01)
-                mstore(add(0x0100, call_), mload(m_))
-                mstore(add(0x0120, call_), mload(add(m_, 0x20)))
+                mstore(add(call_, 0x20), 0x20)
+                mstore(add(call_, 0x40), 0x40)
+                mstore(add(call_, 0xE0), 0x01)
+                mstore(add(call_, 0x0100), mload(m_))
+                mstore(add(call_, 0x0120), mload(add(m_, 0x20)))
 
                 pop(staticcall(gas(), 0x5, call_, 0x0140, r_, 0x40))
             }
         }
     }
 
+    /**
+     * @notice Performs bitwise AND of two 512-bit unsigned integers.
+     * @dev Computes `a_ & b_` and stores the result in `r_`.
+     */
+    function _and(uint512 a_, uint512 b_, uint512 r_) internal pure {
+        unchecked {
+            assembly {
+                mstore(r_, and(mload(a_), mload(b_)))
+                mstore(add(r_, 0x20), and(mload(add(a_, 0x20)), mload(add(b_, 0x20))))
+            }
+        }
+    }
+
+    /**
+     * @notice Performs bitwise OR of two 512-bit unsigned integers.
+     * @dev Computes `a_ | b_` and stores the result in `r_`.
+     */
+    function _or(uint512 a_, uint512 b_, uint512 r_) internal pure {
+        unchecked {
+            assembly {
+                mstore(r_, or(mload(a_), mload(b_)))
+                mstore(add(r_, 0x20), or(mload(add(a_, 0x20)), mload(add(b_, 0x20))))
+            }
+        }
+    }
+
+    /**
+     * @notice Performs bitwise XOR of two 512-bit unsigned integers.
+     * @dev Computes `a_ ^ b_` and stores the result in `r_`.
+     */
+    function _xor(uint512 a_, uint512 b_, uint512 r_) internal pure {
+        unchecked {
+            assembly {
+                mstore(r_, xor(mload(a_), mload(b_)))
+                mstore(add(r_, 0x20), xor(mload(add(a_, 0x20)), mload(add(b_, 0x20))))
+            }
+        }
+    }
+
+    /**
+     * @notice Performs bitwise NOT of a 512-bit unsigned integer.
+     * @dev Computes `~a_` and stores the result in `r_`.
+     */
+    function _not(uint512 a_, uint512 r_) internal pure {
+        unchecked {
+            assembly {
+                mstore(r_, not(mload(a_)))
+                mstore(add(r_, 0x20), not(mload(add(a_, 0x20))))
+            }
+        }
+    }
+
+    /**
+     * @notice Performs left shift of a 512-bit unsigned integer.
+     * @dev Computes `a_ << b_` and stores the result in `r_`.
+     */
+    function _shl(uint512 a_, uint8 b_, uint512 r_) internal pure {
+        unchecked {
+            assembly {
+                mstore(r_, or(shl(b_, mload(a_)), shr(sub(256, b_), mload(add(a_, 0x20)))))
+                mstore(add(r_, 0x20), shl(b_, mload(add(a_, 0x20))))
+            }
+        }
+    }
+
+    /**
+     * @notice Performs right shift of a 512-bit unsigned integer.
+     * @dev Computes `a_ >> b_` and stores the result in `r_`.
+     */
+    function _shr(uint512 a_, uint8 b_, uint512 r_) internal pure {
+        unchecked {
+            assembly {
+                mstore(
+                    add(r_, 0x20),
+                    or(shr(b_, mload(add(a_, 0x20))), shl(sub(256, b_), mload(a_)))
+                )
+                mstore(r_, shr(b_, mload(a_)))
+            }
+        }
+    }
+
     /**
      * @notice Calculates a memory pointer for a buffer based on the provided `call_` pointer.
      */
     function _buffer(call call_) private pure returns (uint512 buffer_) {
         unchecked {
             assembly {
-                buffer_ := add(call_, 0x0140)
+                buffer_ := sub(call_, 0x40)
             }
         }
     }
diff --git a/contracts/mock/libs/bn/U512Mock.sol b/contracts/mock/libs/bn/U512Mock.sol
index 9111b415..bd807ab1 100644
--- a/contracts/mock/libs/bn/U512Mock.sol
+++ b/contracts/mock/libs/bn/U512Mock.sol
@@ -27,6 +27,27 @@ contract U512Mock {
         valueCopy_ = U512.toBytes(pointerCopy_);
     }
 
+    function assign(
+        uint256 u256_
+    )
+        external
+        pure
+        returns (
+            uint512 pointerOriginal_,
+            uint512 pointerAssign_,
+            bytes memory valueOriginal_,
+            bytes memory valueAssign_
+        )
+    {
+        pointerOriginal_ = U512.fromUint256(u256_);
+        valueOriginal_ = U512.toBytes(pointerOriginal_);
+
+        pointerAssign_ = U512.fromUint256(0);
+
+        U512.assign(pointerOriginal_, pointerAssign_);
+        valueAssign_ = U512.toBytes(pointerAssign_);
+    }
+
     function isNull(uint512 pointer_) external pure returns (bool isNull_) {
         return U512.isNull(pointer_);
     }
@@ -63,6 +84,16 @@ contract U512Mock {
         return U512.mod(call_, a_, m_).toBytes();
     }
 
+    function modAlloc(
+        bytes memory aBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        return U512.mod(a_, m_).toBytes();
+    }
+
     function modAssign(
         bytes memory aBytes_,
         bytes memory mBytes_
@@ -105,6 +136,16 @@ contract U512Mock {
         return U512.modinv(call_, a_, m_).toBytes();
     }
 
+    function modinvAlloc(
+        bytes memory aBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        return U512.modinv(a_, m_).toBytes();
+    }
+
     function modinvAssign(
         bytes memory aBytes_,
         bytes memory mBytes_
@@ -257,6 +298,18 @@ contract U512Mock {
         return U512.modadd(call_, a_, b_, m_).toBytes();
     }
 
+    function modaddAlloc(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        return U512.modadd(a_, b_, m_).toBytes();
+    }
+
     function modaddAssign(
         bytes memory aBytes_,
         bytes memory bBytes_,
@@ -305,6 +358,18 @@ contract U512Mock {
         return U512.redadd(call_, a_, b_, m_).toBytes();
     }
 
+    function redaddAlloc(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        return U512.redadd(a_, b_, m_).toBytes();
+    }
+
     function redaddAssign(
         bytes memory aBytes_,
         bytes memory bBytes_,
@@ -353,6 +418,18 @@ contract U512Mock {
         return U512.modsub(call_, a_, b_, m_).toBytes();
     }
 
+    function modsubAlloc(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        return U512.modsub(a_, b_, m_).toBytes();
+    }
+
     function modsubAssign(
         bytes memory aBytes_,
         bytes memory bBytes_,
@@ -401,6 +478,18 @@ contract U512Mock {
         return U512.redsub(call_, a_, b_, m_).toBytes();
     }
 
+    function redsubAlloc(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        return U512.redsub(a_, b_, m_).toBytes();
+    }
+
     function redsubAssign(
         bytes memory aBytes_,
         bytes memory bBytes_,
@@ -449,6 +538,18 @@ contract U512Mock {
         return U512.modmul(call_, a_, b_, m_).toBytes();
     }
 
+    function modmulAlloc(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        return U512.modmul(a_, b_, m_).toBytes();
+    }
+
     function modmulAssign(
         bytes memory aBytes_,
         bytes memory bBytes_,
@@ -497,6 +598,18 @@ contract U512Mock {
         return U512.modexp(call_, a_, b_, m_).toBytes();
     }
 
+    function modexpAlloc(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        return U512.modexp(a_, b_, m_).toBytes();
+    }
+
     function modexpAssign(
         bytes memory aBytes_,
         bytes memory bBytes_,
@@ -545,6 +658,18 @@ contract U512Mock {
         return U512.moddiv(call_, a_, b_, m_).toBytes();
     }
 
+    function moddivAlloc(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        return U512.moddiv(a_, b_, m_).toBytes();
+    }
+
     function moddivAssign(
         bytes memory aBytes_,
         bytes memory bBytes_,
@@ -578,4 +703,198 @@ contract U512Mock {
 
         return to_.toBytes();
     }
+
+    function and(
+        bytes memory aBytes_,
+        bytes memory bBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+
+        return U512.and(a_, b_).toBytes();
+    }
+
+    function andAssign(
+        bytes memory aBytes_,
+        bytes memory bBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+
+        U512.andAssign(a_, b_);
+
+        return a_.toBytes();
+    }
+
+    function andAssignTo(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory toBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 to_ = U512.fromBytes(toBytes_);
+
+        U512.andAssignTo(a_, b_, to_);
+
+        return to_.toBytes();
+    }
+
+    function or(
+        bytes memory aBytes_,
+        bytes memory bBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+
+        return U512.or(a_, b_).toBytes();
+    }
+
+    function orAssign(
+        bytes memory aBytes_,
+        bytes memory bBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+
+        U512.orAssign(a_, b_);
+
+        return a_.toBytes();
+    }
+
+    function orAssignTo(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory toBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 to_ = U512.fromBytes(toBytes_);
+
+        U512.orAssignTo(a_, b_, to_);
+
+        return to_.toBytes();
+    }
+
+    function xor(
+        bytes memory aBytes_,
+        bytes memory bBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+
+        return U512.xor(a_, b_).toBytes();
+    }
+
+    function xorAssign(
+        bytes memory aBytes_,
+        bytes memory bBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+
+        U512.xorAssign(a_, b_);
+
+        return a_.toBytes();
+    }
+
+    function xorAssignTo(
+        bytes memory aBytes_,
+        bytes memory bBytes_,
+        bytes memory toBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 b_ = U512.fromBytes(bBytes_);
+        uint512 to_ = U512.fromBytes(toBytes_);
+
+        U512.xorAssignTo(a_, b_, to_);
+
+        return to_.toBytes();
+    }
+
+    function not(bytes memory aBytes_) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+
+        return U512.not(a_).toBytes();
+    }
+
+    function notAssign(bytes memory aBytes_) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+
+        U512.notAssign(a_);
+
+        return a_.toBytes();
+    }
+
+    function notAssignTo(
+        bytes memory aBytes_,
+        bytes memory toBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 to_ = U512.fromBytes(toBytes_);
+
+        U512.notAssignTo(a_, to_);
+
+        return to_.toBytes();
+    }
+
+    function shl(bytes memory aBytes_, uint8 b_) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+
+        return U512.shl(a_, b_).toBytes();
+    }
+
+    function shlAssign(
+        bytes memory aBytes_,
+        uint8 b_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+
+        U512.shlAssign(a_, b_);
+
+        return a_.toBytes();
+    }
+
+    function shlAssignTo(
+        bytes memory aBytes_,
+        uint8 b_,
+        bytes memory toBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 to_ = U512.fromBytes(toBytes_);
+
+        U512.shlAssignTo(a_, b_, to_);
+
+        return to_.toBytes();
+    }
+
+    function shr(bytes memory aBytes_, uint8 b_) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+
+        return U512.shr(a_, b_).toBytes();
+    }
+
+    function shrAssign(
+        bytes memory aBytes_,
+        uint8 b_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+
+        U512.shrAssign(a_, b_);
+
+        return a_.toBytes();
+    }
+
+    function shrAssignTo(
+        bytes memory aBytes_,
+        uint8 b_,
+        bytes memory toBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 to_ = U512.fromBytes(toBytes_);
+
+        U512.shrAssignTo(a_, b_, to_);
+
+        return to_.toBytes();
+    }
 }
diff --git a/test/libs/bn/U512.test.ts b/test/libs/bn/U512.test.ts
index cd69bd43..4ce3ac43 100644
--- a/test/libs/bn/U512.test.ts
+++ b/test/libs/bn/U512.test.ts
@@ -101,11 +101,11 @@ describe.only("U512", () => {
   }
 
   function modsub(a: string, b: string, m: string): string {
-    const aBn = ethers.toBigInt(a);
-    const bBn = ethers.toBigInt(b);
-    const mBn = ethers.toBigInt(m);
+    const aBigInt = ethers.toBigInt(a);
+    const bBigInt = ethers.toBigInt(b);
+    const mBigInt = ethers.toBigInt(m);
 
-    return toBytes((((aBn - bBn) % mBn) + mBn) % mBn);
+    return toBytes((((aBigInt - bBigInt) % mBigInt) + mBigInt) % mBigInt);
   }
 
   function moddiv(a: string, b: string, m: string) {
@@ -119,6 +119,35 @@ describe.only("U512", () => {
     return toBytes(result);
   }
 
+  function and(a: string, b: string): string {
+    return toBytes(ethers.toBigInt(a) & ethers.toBigInt(b));
+  }
+
+  function or(a: string, b: string): string {
+    return toBytes(ethers.toBigInt(a) | ethers.toBigInt(b));
+  }
+
+  function xor(a: string, b: string): string {
+    return toBytes(ethers.toBigInt(a) ^ ethers.toBigInt(b));
+  }
+
+  function not(a: string): string {
+    // ~a = -a - 1
+    const maxUint512 = (BigInt(1) << BigInt(512)) - 1n;
+
+    return sub(toBytes(maxUint512), a);
+  }
+
+  function shl(a: string, b: number): string {
+    const maxUint512 = BigInt(1) << BigInt(512);
+
+    return toBytes((ethers.toBigInt(a) << BigInt(b)) % maxUint512);
+  }
+
+  function shr(a: string, b: number): string {
+    return toBytes(ethers.toBigInt(a) >> BigInt(b));
+  }
+
   before(async () => {
     const U512Mock = await ethers.getContractFactory("U512Mock");
 
@@ -136,6 +165,13 @@ describe.only("U512", () => {
     expect(valueOriginal).to.be.equal(valueCopy);
   });
 
+  it("assign test", async () => {
+    const [pointerOriginal, pointerAssign, valueOriginal, valueAssign] = await u512.assign(prime);
+
+    expect(pointerOriginal).to.not.eq(pointerAssign);
+    expect(valueOriginal).to.be.equal(valueAssign);
+  });
+
   it("isNull test", async () => {
     expect(await u512.isNull(0)).to.be.true;
     expect(await u512.isNull(64)).to.be.false;
@@ -169,6 +205,7 @@ describe.only("U512", () => {
       const to = randomU512();
 
       expect(await u512.mod(a, m)).to.be.equal(mod(a, m));
+      expect(await u512.modAlloc(a, m)).to.be.equal(mod(a, m));
       expect(await u512.modAssign(a, m)).to.be.equal(mod(a, m));
       expect(await u512.modAssignTo(a, m, to)).to.be.equal(mod(a, m));
     }
@@ -182,6 +219,7 @@ describe.only("U512", () => {
       const to = randomU512();
 
       expect(await u512.modinv(a, m)).to.be.equal(modinv(a, m));
+      expect(await u512.modinvAlloc(a, m)).to.be.equal(modinv(a, m));
       expect(await u512.modinvAssign(a, m)).to.be.equal(modinv(a, m));
       expect(await u512.modinvAssignTo(a, m, to)).to.be.equal(modinv(a, m));
     }
@@ -231,6 +269,7 @@ describe.only("U512", () => {
       const to = randomU512();
 
       expect(await u512.modadd(a, b, m)).to.equal(modadd(a, b, m));
+      expect(await u512.modaddAlloc(a, b, m)).to.equal(modadd(a, b, m));
       expect(await u512.modaddAssign(a, b, m)).to.equal(modadd(a, b, m));
       expect(await u512.modaddAssignTo(a, b, m, to)).to.equal(modadd(a, b, m));
     }
@@ -246,6 +285,7 @@ describe.only("U512", () => {
       const to = randomU512();
 
       expect(await u512.redadd(a, b, m)).to.equal(modadd(a, b, m));
+      expect(await u512.redaddAlloc(a, b, m)).to.equal(modadd(a, b, m));
       expect(await u512.redaddAssign(a, b, m)).to.equal(modadd(a, b, m));
       expect(await u512.redaddAssignTo(a, b, m, to)).to.equal(modadd(a, b, m));
     }
@@ -259,6 +299,7 @@ describe.only("U512", () => {
       const to = randomU512();
 
       expect(await u512.modmul(a, b, m)).to.equal(modmul(a, b, m));
+      expect(await u512.modmulAlloc(a, b, m)).to.equal(modmul(a, b, m));
       expect(await u512.modmulAssign(a, b, m)).to.equal(modmul(a, b, m));
       expect(await u512.modmulAssignTo(a, b, m, to)).to.equal(modmul(a, b, m));
     }
@@ -272,6 +313,7 @@ describe.only("U512", () => {
       const to = randomU512();
 
       expect(await u512.modsub(a, b, m)).to.equal(modsub(a, b, m));
+      expect(await u512.modsubAlloc(a, b, m)).to.equal(modsub(a, b, m));
       expect(await u512.modsubAssign(a, b, m)).to.equal(modsub(a, b, m));
       expect(await u512.modsubAssignTo(a, b, m, to)).to.equal(modsub(a, b, m));
     }
@@ -287,6 +329,7 @@ describe.only("U512", () => {
       const to = randomU512();
 
       expect(await u512.redsub(a, b, m)).to.equal(modsub(a, b, m));
+      expect(await u512.redsubAlloc(a, b, m)).to.equal(modsub(a, b, m));
       expect(await u512.redsubAssign(a, b, m)).to.equal(modsub(a, b, m));
       expect(await u512.redsubAssignTo(a, b, m, to)).to.equal(modsub(a, b, m));
     }
@@ -300,6 +343,7 @@ describe.only("U512", () => {
       const to = randomU512();
 
       expect(await u512.modexp(a, b, m)).to.equal(modexp(a, b, m));
+      expect(await u512.modexpAlloc(a, b, m)).to.equal(modexp(a, b, m));
       expect(await u512.modexpAssign(a, b, m)).to.equal(modexp(a, b, m));
       expect(await u512.modexpAssignTo(a, b, m, to)).to.equal(modexp(a, b, m));
     }
@@ -314,8 +358,78 @@ describe.only("U512", () => {
       const to = randomU512();
 
       expect(await u512.moddiv(a, b, m)).to.be.equal(moddiv(a, b, m));
+      expect(await u512.moddivAlloc(a, b, m)).to.be.equal(moddiv(a, b, m));
       expect(await u512.moddivAssign(a, b, m)).to.be.equal(moddiv(a, b, m));
       expect(await u512.moddivAssignTo(a, b, m, to)).to.be.equal(moddiv(a, b, m));
     }
   });
+
+  it("and test", async () => {
+    for (let i = 0; i < 100; ++i) {
+      const a = randomU512();
+      const b = randomU512();
+      const to = randomU512();
+
+      expect(await u512.and(a, b)).to.be.equal(and(a, b));
+      expect(await u512.andAssign(a, b)).to.be.equal(and(a, b));
+      expect(await u512.andAssignTo(a, b, to)).to.be.equal(and(a, b));
+    }
+  });
+
+  it("or test", async () => {
+    for (let i = 0; i < 100; ++i) {
+      const a = randomU512();
+      const b = randomU512();
+      const to = randomU512();
+
+      expect(await u512.or(a, b)).to.be.equal(or(a, b));
+      expect(await u512.orAssign(a, b)).to.be.equal(or(a, b));
+      expect(await u512.orAssignTo(a, b, to)).to.be.equal(or(a, b));
+    }
+  });
+
+  it("xor test", async () => {
+    for (let i = 0; i < 100; ++i) {
+      const a = randomU512();
+      const b = randomU512();
+      const to = randomU512();
+
+      expect(await u512.xor(a, b)).to.be.equal(xor(a, b));
+      expect(await u512.xorAssign(a, b)).to.be.equal(xor(a, b));
+      expect(await u512.xorAssignTo(a, b, to)).to.be.equal(xor(a, b));
+    }
+  });
+
+  it("not test", async () => {
+    for (let i = 0; i < 100; ++i) {
+      const a = randomU512();
+      const to = randomU512();
+
+      expect(await u512.not(a)).to.be.equal(not(a));
+      expect(await u512.notAssign(a)).to.be.equal(not(a));
+      expect(await u512.notAssignTo(a, to)).to.be.equal(not(a));
+    }
+  });
+
+  it("shl test", async () => {
+    for (let b = 0; b < 256; ++b) {
+      const a = randomU512();
+      const to = randomU512();
+
+      expect(await u512.shl(a, b)).to.be.equal(shl(a, b));
+      expect(await u512.shlAssign(a, b)).to.be.equal(shl(a, b));
+      expect(await u512.shlAssignTo(a, b, to)).to.be.equal(shl(a, b));
+    }
+  });
+
+  it("shr test", async () => {
+    for (let b = 0; b < 256; ++b) {
+      const a = randomU512();
+      const to = randomU512();
+
+      expect(await u512.shr(a, b)).to.be.equal(shr(a, b));
+      expect(await u512.shrAssign(a, b)).to.be.equal(shr(a, b));
+      expect(await u512.shrAssignTo(a, b, to)).to.be.equal(shr(a, b));
+    }
+  });
 });
diff --git a/test/libs/crypto/ECDSA384.test.ts b/test/libs/crypto/ECDSA384.test.ts
index 56f3c35e..fb0cf5f1 100644
--- a/test/libs/crypto/ECDSA384.test.ts
+++ b/test/libs/crypto/ECDSA384.test.ts
@@ -30,7 +30,7 @@ function modifyRight(value: string, modifier: string): string {
   return newSignature;
 }
 
-describe.only("ECDSA384", () => {
+describe("ECDSA384", () => {
   const reverter = new Reverter();
 
   let ecdsa384: ECDSA384Mock;
diff --git a/test/libs/crypto/ECDSA512.test.ts b/test/libs/crypto/ECDSA512.test.ts
index 7a8facf8..240a596c 100644
--- a/test/libs/crypto/ECDSA512.test.ts
+++ b/test/libs/crypto/ECDSA512.test.ts
@@ -4,7 +4,7 @@ import { Reverter } from "@/test/helpers/reverter";
 
 import { ECDSA512Mock } from "@ethers-v6";
 
-describe.only("ECDSA512", () => {
+describe("ECDSA512", () => {
   const reverter = new Reverter();
 
   let ecdsa512: ECDSA512Mock;

From c1c7cd8455fb57fe86895fd1fd3bf0a1811dab9b Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Sun, 26 Jan 2025 17:05:05 +0200
Subject: [PATCH 31/42] added modexpU256 & tested gas

---
 contracts/libs/bn/U512.sol          | 134 ++++++++++++++++++--
 contracts/libs/crypto/ECDSA384.sol  |  10 +-
 contracts/libs/crypto/ECDSA512.sol  |  10 +-
 contracts/mock/libs/bn/U512Mock.sol | 188 +++++++++++++++++++++++++---
 test/libs/bn/U512.test.ts           |  16 ++-
 test/libs/crypto/ECDSA384.test.ts   |   2 +-
 test/libs/crypto/ECDSA512.test.ts   |   2 +-
 7 files changed, 321 insertions(+), 41 deletions(-)

diff --git a/contracts/libs/bn/U512.sol b/contracts/libs/bn/U512.sol
index a0d95c4d..a056c670 100644
--- a/contracts/libs/bn/U512.sol
+++ b/contracts/libs/bn/U512.sol
@@ -6,6 +6,28 @@ type call is uint256;
 
 /**
  * @notice Low-level library that implements unsigned 512-bit arithmetics.
+
+ * | Statistic   | Avg          |
+ * | ----------- | ------------ |
+ * | add         | 269 gas      |
+ * | sub         | 278 gas      |
+ * | mul         | 353 gas      |
+ * | mod         | 682 gas      |
+ * | modinv      | 6083 gas     |
+ * | modadd      | 780 gas      |
+ * | redadd      | 590 gas      |
+ * | modmul      | 1176 gas     |
+ * | modsub      | 1017 gas     |
+ * | redsub      | 533 gas      |
+ * | modexp      | 5981 gas     |
+ * | modexpU256  | 692 gas      |
+ * | moddiv      | 7092 gas     |
+ * | and         | 251 gas      |
+ * | or          | 251 gas      |
+ * | xor         | 251 gas      |
+ * | not         | 216 gas      |
+ * | shl         | 272 gas      |
+ * | shr         | 272 gas      |
  */
 library U512 {
     uint256 private constant _UINT512_ALLOCATION = 64;
@@ -267,7 +289,7 @@ library U512 {
 
     /**
      * @notice Computes the modular inverse of a 512-bit unsigned integer.
-     * @dev Warning: The modulus `m_` must be a prime number
+     * @dev IMPORTANT: The modulus `m_` must be a prime number
      * @param call_ A memory pointer for precompile call arguments.
      * @param a_ The 512-bit unsigned integer to invert.
      * @param m_ The modulus.
@@ -283,7 +305,7 @@ library U512 {
 
     /**
      * @notice Computes the modular inverse of a 512-bit unsigned integer.
-     * @dev Warning: The modulus `m_` must be a prime number
+     * @dev IMPORTANT: The modulus `m_` must be a prime number
      * @dev Allocates memory for `call` every time it's called.
      * @param a_ The 512-bit unsigned integer to invert.
      * @param m_ The modulus.
@@ -300,7 +322,7 @@ library U512 {
 
     /**
      * @notice Performs the modular inverse assignment on a 512-bit unsigned integer.
-     * @dev Warning: The modulus `m_` must be a prime number
+     * @dev IMPORTANT: The modulus `m_` must be a prime number
      * @dev Updates the value of `a_` to `a_^(-1) % m_`.
      * @param call_ A memory pointer for precompile call arguments.
      * @param a_ The 512-bit unsigned integer to invert.
@@ -314,7 +336,7 @@ library U512 {
 
     /**
      * @notice Computes the modular inverse and stores it in a separate 512-bit unsigned integer.
-     * @dev Warning: The modulus `m_` must be a prime number
+     * @dev IMPORTANT: The modulus `m_` must be a prime number
      * @dev Assigns the result of `a_^(-1) % m_` to `to_`.
      * @param call_ A memory pointer for precompile call arguments.
      * @param a_ The 512-bit unsigned integer to invert.
@@ -400,6 +422,79 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs modular exponentiation on 512-bit unsigned integers.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param b_ The base.
+     * @param e_ The exponent.
+     * @param m_ The modulus.
+     * @return r_ The result of modular exponentiation `(b_^e_) % m_`.
+     */
+    function modexpU256(
+        call call_,
+        uint512 b_,
+        uint256 e_,
+        uint512 m_
+    ) internal view returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
+
+            _modexpU256(call_, b_, e_, m_, r_);
+        }
+    }
+
+    /**
+     * @notice Performs modular exponentiation of a 512-bit unsigned integer base and a 256-bit unsigned integer exponent.
+     * @dev Allocates memory for `call` every time it's called.
+     * @param b_ The base.
+     * @param e_ The exponent.
+     * @param m_ The modulus.
+     * @return r_ The result of modular exponentiation `(b_^e_) % m_`.
+     */
+    function modexpU256(uint512 b_, uint256 e_, uint512 m_) internal view returns (uint512 r_) {
+        unchecked {
+            r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
+            call call_ = initCall();
+
+            _modexpU256(call_, b_, e_, m_, r_);
+        }
+    }
+
+    /**
+     * @notice Performs modular exponentiation of a 512-bit unsigned integer base and a 256-bit unsigned integer exponent.
+     * @dev Updates the value of `b_` to `(b_^e_) % m_`.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param b_ The base.
+     * @param e_ The exponent.
+     * @param m_ The modulus.
+     */
+    function modexpU256Assign(call call_, uint512 b_, uint256 e_, uint512 m_) internal view {
+        unchecked {
+            _modexpU256(call_, b_, e_, m_, b_);
+        }
+    }
+
+    /**
+     * @notice Performs modular exponentiation of a 512-bit unsigned integer base and a 256-bit unsigned integer exponent.
+     * @dev Assigns the result of `(b_^e_) % m_` to `to_`.
+     * @param call_ A memory pointer for precompile call arguments.
+     * @param b_ The base.
+     * @param e_ The exponent.
+     * @param m_ The modulus.
+     * @param to_ The target 512-bit unsigned integer to store the result.
+     */
+    function modexpU256AssignTo(
+        call call_,
+        uint512 b_,
+        uint256 e_,
+        uint512 m_,
+        uint512 to_
+    ) internal view {
+        unchecked {
+            _modexpU256(call_, b_, e_, m_, to_);
+        }
+    }
+
     /**
      * @notice Adds two 512-bit unsigned integers under a modulus.
      * @param call_ A memory pointer for precompile call arguments.
@@ -890,7 +985,7 @@ library U512 {
 
     /**
      * @notice Divides two 512-bit unsigned integers under a modulus.
-     * @dev Warning: The modulus `m_` must be a prime number.
+     * @dev IMPORTANT: The modulus `m_` must be a prime number.
      * @dev Returns the result of `(a_ * b_^(-1)) % m_`.
      * @param call_ A memory pointer for precompile call arguments.
      * @param a_ The dividend.
@@ -913,7 +1008,7 @@ library U512 {
 
     /**
      * @notice Divides two 512-bit unsigned integers under a modulus.
-     * @dev Warning: The modulus `m_` must be a prime number.
+     * @dev IMPORTANT: The modulus `m_` must be a prime number.
      * @dev Returns the result of `(a_ * b_^(-1)) % m_`.
      * @dev Allocates memory for `call` every time it's called.
      * @param a_ The dividend.
@@ -932,7 +1027,7 @@ library U512 {
 
     /**
      * @notice Performs the modular division assignment on a 512-bit unsigned dividend.
-     * @dev Warning: The modulus `m_` must be a prime number.
+     * @dev IMPORTANT: The modulus `m_` must be a prime number.
      * @dev Updates the value of `a_` to `(a_ * b_^(-1)) % m_`.
      * @param call_ A memory pointer for precompile call arguments.
      * @param a_ The dividend.
@@ -947,7 +1042,7 @@ library U512 {
 
     /**
      * @notice Performs the modular division and stores the result in a separate 512-bit unsigned integer.
-     * @dev Warning: The modulus `m_` must be a prime number.
+     * @dev IMPORTANT: The modulus `m_` must be a prime number.
      * @dev Assigns the result of `(a_ * b_^(-1)) % m_` to `to_`.
      * @param call_ A memory pointer for precompile call arguments.
      * @param a_ The dividend.
@@ -1241,6 +1336,27 @@ library U512 {
         }
     }
 
+    /**
+     * @notice Performs modular exponentiation using the EVM precompiled contract.
+     * @dev Computes `(a_^e_) % m_` and stores the result in `r_`.
+     */
+    function _modexpU256(call call_, uint512 a_, uint256 e_, uint512 m_, uint512 r_) private view {
+        unchecked {
+            assembly {
+                mstore(call_, 0x40)
+                mstore(add(call_, 0x20), 0x20)
+                mstore(add(call_, 0x40), 0x40)
+                mstore(add(call_, 0x60), mload(a_))
+                mstore(add(call_, 0x80), mload(add(a_, 0x20)))
+                mstore(add(call_, 0xA0), e_)
+                mstore(add(call_, 0xC0), mload(m_))
+                mstore(add(call_, 0xE0), mload(add(m_, 0x20)))
+
+                pop(staticcall(gas(), 0x5, call_, 0x0100, r_, 0x40))
+            }
+        }
+    }
+
     /**
      * @notice Computes the modular inverse using the EVM precompiled contract.
      * @dev The modulus `m_` must be a prime number.
@@ -1428,6 +1544,7 @@ library U512 {
     /**
      * @notice Multiplies two 512-bit unsigned integers.
      * @dev Computes `a_ * b_` and stores the result in `r_`.
+     * @dev Generalizes the "muldiv" algorithm to split 512-bit unsigned integers into chunks, as detailed at https://xn--2-umb.com/21/muldiv/.
      */
     function _mul(uint512 a_, uint512 b_, uint512 r_) private pure {
         unchecked {
@@ -1457,6 +1574,7 @@ library U512 {
     /**
      * @notice Prepares intermediate results for modular multiplication.
      * @dev Calculates partial products and stores them in `call_` for further processing.
+     * @dev Generalizes the "muldiv" algorithm to split 512-bit unsigned integers into chunks, as detailed at https://xn--2-umb.com/21/muldiv/.
      */
     function _modmul2p(call call_, uint512 a_, uint512 b_) private pure {
         unchecked {
diff --git a/contracts/libs/crypto/ECDSA384.sol b/contracts/libs/crypto/ECDSA384.sol
index 8e4e9992..b444392e 100644
--- a/contracts/libs/crypto/ECDSA384.sol
+++ b/contracts/libs/crypto/ECDSA384.sol
@@ -155,8 +155,8 @@ library ECDSA384 {
                 return false;
             }
 
-            uint512 lhs_ = U512.modexp(call_, y_, U512.fromUint256(2), p_);
-            uint512 rhs_ = U512.modexp(call_, x_, U512.fromUint256(3), p_);
+            uint512 lhs_ = U512.modexpU256(call_, y_, 2, p_);
+            uint512 rhs_ = U512.modexpU256(call_, x_, 3, p_);
 
             if (!U512.eqUint256(a_, 0)) {
                 rhs_ = U512.redadd(call_, rhs_, U512.modmul(call_, x_, a_, p_), p_); // x^3 + a*x
@@ -282,14 +282,14 @@ library ECDSA384 {
                 return (x2_, y2_);
             }
 
-            uint512 m1_ = U512.modexp(call_, x1_, two_, p_);
+            uint512 m1_ = U512.modexpU256(call_, x1_, 2, p_);
             U512.modmulAssign(call_, m1_, three_, p_);
             U512.redaddAssign(call_, m1_, a_, p_);
 
             uint512 m2_ = U512.modmul(call_, y1_, two_, p_);
             U512.moddivAssign(call_, m1_, m2_, p_);
 
-            x2_ = U512.modexp(call_, m1_, two_, p_);
+            x2_ = U512.modexpU256(call_, m1_, 2, p_);
             U512.redsubAssign(call_, x2_, x1_, p_);
             U512.redsubAssign(call_, x2_, x1_, p_);
 
@@ -338,7 +338,7 @@ library ECDSA384 {
 
             U512.moddivAssign(call_, m1_, m2_, p_);
 
-            x3 = U512.modexp(call_, m1_, two_, p_);
+            x3 = U512.modexpU256(call_, m1_, 2, p_);
             U512.redsubAssign(call_, x3, x1_, p_);
             U512.redsubAssign(call_, x3, x2_, p_);
 
diff --git a/contracts/libs/crypto/ECDSA512.sol b/contracts/libs/crypto/ECDSA512.sol
index ba6d2bc9..75ea4fc6 100644
--- a/contracts/libs/crypto/ECDSA512.sol
+++ b/contracts/libs/crypto/ECDSA512.sol
@@ -155,8 +155,8 @@ library ECDSA512 {
                 return false;
             }
 
-            uint512 lhs_ = U512.modexp(call_, y_, U512.fromUint256(2), p_);
-            uint512 rhs_ = U512.modexp(call_, x_, U512.fromUint256(3), p_);
+            uint512 lhs_ = U512.modexpU256(call_, y_, 2, p_);
+            uint512 rhs_ = U512.modexpU256(call_, x_, 3, p_);
 
             if (!U512.eqUint256(a_, 0)) {
                 rhs_ = U512.redadd(call_, rhs_, U512.modmul(call_, x_, a_, p_), p_); // x^3 + a*x
@@ -290,14 +290,14 @@ library ECDSA512 {
                 return (x2_, y2_);
             }
 
-            uint512 m1_ = U512.modexp(call_, x1_, two_, p_);
+            uint512 m1_ = U512.modexpU256(call_, x1_, 2, p_);
             U512.modmulAssign(call_, m1_, three_, p_);
             U512.redaddAssign(call_, m1_, a_, p_);
 
             uint512 m2_ = U512.modmul(call_, y1_, two_, p_);
             U512.moddivAssign(call_, m1_, m2_, p_);
 
-            x2_ = U512.modexp(call_, m1_, two_, p_);
+            x2_ = U512.modexpU256(call_, m1_, 2, p_);
             U512.redsubAssign(call_, x2_, x1_, p_);
             U512.redsubAssign(call_, x2_, x1_, p_);
 
@@ -346,7 +346,7 @@ library ECDSA512 {
 
             U512.moddivAssign(call_, m1_, m2_, p_);
 
-            x3 = U512.modexp(call_, m1_, two_, p_);
+            x3 = U512.modexpU256(call_, m1_, 2, p_);
             U512.redsubAssign(call_, x3, x1_, p_);
             U512.redsubAssign(call_, x3, x2_, p_);
 
diff --git a/contracts/mock/libs/bn/U512Mock.sol b/contracts/mock/libs/bn/U512Mock.sol
index bd807ab1..45652635 100644
--- a/contracts/mock/libs/bn/U512Mock.sol
+++ b/contracts/mock/libs/bn/U512Mock.sol
@@ -4,6 +4,7 @@ pragma solidity ^0.8.4;
 import {call} from "../../../libs/bn/U512.sol";
 import {uint512} from "../../../libs/bn/U512.sol";
 import {U512} from "../../../libs/bn/U512.sol";
+// import "hardhat/console.sol";
 
 contract U512Mock {
     using U512 for *;
@@ -81,7 +82,12 @@ contract U512Mock {
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 m_ = U512.fromBytes(mBytes_);
 
-        return U512.mod(call_, a_, m_).toBytes();
+        // uint256 gasBefore_ = gasleft();
+        uint512 result_ = U512.mod(call_, a_, m_);
+
+        // console.log("mod gas: ", gasBefore_ - gasleft());
+
+        return result_.toBytes();
     }
 
     function modAlloc(
@@ -133,7 +139,12 @@ contract U512Mock {
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 m_ = U512.fromBytes(mBytes_);
 
-        return U512.modinv(call_, a_, m_).toBytes();
+        // uint256 gasBefore_ = gasleft();
+        uint512 result_ = U512.modinv(call_, a_, m_);
+
+        // console.log("modinv gas: ", gasBefore_ - gasleft());
+
+        return result_.toBytes();
     }
 
     function modinvAlloc(
@@ -183,7 +194,12 @@ contract U512Mock {
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
 
-        return U512.add(a_, b_).toBytes();
+        // uint256 gasBefore_ = gasleft();
+        uint512 result_ = U512.add(a_, b_);
+
+        // console.log("add gas: ", gasBefore_ - gasleft());
+
+        return result_.toBytes();
     }
 
     function addAssign(
@@ -219,7 +235,12 @@ contract U512Mock {
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
 
-        return U512.sub(a_, b_).toBytes();
+        // uint256 gasBefore_ = gasleft();
+        uint512 result_ = U512.sub(a_, b_);
+
+        // console.log("sub gas: ", gasBefore_ - gasleft());
+
+        return result_.toBytes();
     }
 
     function subAssign(
@@ -255,7 +276,12 @@ contract U512Mock {
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
 
-        return U512.mul(a_, b_).toBytes();
+        // uint256 gasBefore_ = gasleft();
+        uint512 result_ = U512.mul(a_, b_);
+
+        // console.log("mul gas: ", gasBefore_ - gasleft());
+
+        return result_.toBytes();
     }
 
     function mulAssign(
@@ -295,7 +321,12 @@ contract U512Mock {
         uint512 b_ = U512.fromBytes(bBytes_);
         uint512 m_ = U512.fromBytes(mBytes_);
 
-        return U512.modadd(call_, a_, b_, m_).toBytes();
+        // uint256 gasBefore_ = gasleft();
+        uint512 result_ = U512.modadd(call_, a_, b_, m_);
+
+        // console.log("modadd gas: ", gasBefore_ - gasleft());
+
+        return result_.toBytes();
     }
 
     function modaddAlloc(
@@ -355,7 +386,12 @@ contract U512Mock {
         uint512 b_ = U512.fromBytes(bBytes_);
         uint512 m_ = U512.fromBytes(mBytes_);
 
-        return U512.redadd(call_, a_, b_, m_).toBytes();
+        // uint256 gasBefore_ = gasleft();
+        uint512 result_ = U512.redadd(call_, a_, b_, m_);
+
+        // console.log("redadd gas: ", gasBefore_ - gasleft());
+
+        return result_.toBytes();
     }
 
     function redaddAlloc(
@@ -415,7 +451,12 @@ contract U512Mock {
         uint512 b_ = U512.fromBytes(bBytes_);
         uint512 m_ = U512.fromBytes(mBytes_);
 
-        return U512.modsub(call_, a_, b_, m_).toBytes();
+        // uint256 gasBefore_ = gasleft();
+        uint512 result_ = U512.modsub(call_, a_, b_, m_);
+
+        // console.log("modsub gas: ", gasBefore_ - gasleft());
+
+        return result_.toBytes();
     }
 
     function modsubAlloc(
@@ -475,7 +516,12 @@ contract U512Mock {
         uint512 b_ = U512.fromBytes(bBytes_);
         uint512 m_ = U512.fromBytes(mBytes_);
 
-        return U512.redsub(call_, a_, b_, m_).toBytes();
+        // uint256 gasBefore_ = gasleft();
+        uint512 result_ = U512.redsub(call_, a_, b_, m_);
+
+        // console.log("redsub gas: ", gasBefore_ - gasleft());
+
+        return result_.toBytes();
     }
 
     function redsubAlloc(
@@ -535,7 +581,12 @@ contract U512Mock {
         uint512 b_ = U512.fromBytes(bBytes_);
         uint512 m_ = U512.fromBytes(mBytes_);
 
-        return U512.modmul(call_, a_, b_, m_).toBytes();
+        // uint256 gasBefore_ = gasleft();
+        uint512 result_ = U512.modmul(call_, a_, b_, m_);
+
+        // console.log("modmul gas: ", gasBefore_ - gasleft());
+
+        return result_.toBytes();
     }
 
     function modmulAlloc(
@@ -595,7 +646,12 @@ contract U512Mock {
         uint512 b_ = U512.fromBytes(bBytes_);
         uint512 m_ = U512.fromBytes(mBytes_);
 
-        return U512.modexp(call_, a_, b_, m_).toBytes();
+        // uint256 gasBefore_ = gasleft();
+        uint512 result_ = U512.modexp(call_, a_, b_, m_);
+
+        // console.log("modexp gas: ", gasBefore_ - gasleft());
+
+        return result_.toBytes();
     }
 
     function modexpAlloc(
@@ -644,6 +700,67 @@ contract U512Mock {
         return to_.toBytes();
     }
 
+    function modexpU256(
+        bytes memory aBytes_,
+        uint256 b_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        // uint256 gasBefore_ = gasleft();
+        uint512 result_ = U512.modexpU256(call_, a_, b_, m_);
+
+        // console.log("modexpU256 gas: ", gasBefore_ - gasleft());
+
+        return result_.toBytes();
+    }
+
+    function modexpU256Alloc(
+        bytes memory aBytes_,
+        uint256 b_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        return U512.modexpU256(a_, b_, m_).toBytes();
+    }
+
+    function modexpU256Assign(
+        bytes memory aBytes_,
+        uint256 b_,
+        bytes memory mBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+
+        U512.modexpU256Assign(call_, a_, b_, m_);
+
+        return a_.toBytes();
+    }
+
+    function modexpU256AssignTo(
+        bytes memory aBytes_,
+        uint256 b_,
+        bytes memory mBytes_,
+        bytes memory toBytes_
+    ) external view returns (bytes memory rBytes_) {
+        call call_ = U512.initCall();
+
+        uint512 a_ = U512.fromBytes(aBytes_);
+        uint512 m_ = U512.fromBytes(mBytes_);
+        uint512 to_ = U512.fromBytes(toBytes_);
+
+        U512.modexpU256AssignTo(call_, a_, b_, m_, to_);
+
+        return to_.toBytes();
+    }
+
     function moddiv(
         bytes memory aBytes_,
         bytes memory bBytes_,
@@ -655,7 +772,12 @@ contract U512Mock {
         uint512 b_ = U512.fromBytes(bBytes_);
         uint512 m_ = U512.fromBytes(mBytes_);
 
-        return U512.moddiv(call_, a_, b_, m_).toBytes();
+        // uint256 gasBefore_ = gasleft();
+        uint512 result_ = U512.moddiv(call_, a_, b_, m_);
+
+        // console.log("moddiv gas: ", gasBefore_ - gasleft());
+
+        return result_.toBytes();
     }
 
     function moddivAlloc(
@@ -721,9 +843,12 @@ contract U512Mock {
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
 
-        U512.andAssign(a_, b_);
+        // uint256 gasBefore_ = gasleft();
+        uint512 result_ = U512.and(a_, b_);
 
-        return a_.toBytes();
+        // console.log("and gas: ", gasBefore_ - gasleft());
+
+        return result_.toBytes();
     }
 
     function andAssignTo(
@@ -757,9 +882,12 @@ contract U512Mock {
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
 
-        U512.orAssign(a_, b_);
+        // uint256 gasBefore_ = gasleft();
+        uint512 result_ = U512.or(a_, b_);
 
-        return a_.toBytes();
+        // console.log("or gas: ", gasBefore_ - gasleft());
+
+        return result_.toBytes();
     }
 
     function orAssignTo(
@@ -783,7 +911,12 @@ contract U512Mock {
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
 
-        return U512.xor(a_, b_).toBytes();
+        // uint256 gasBefore_ = gasleft();
+        uint512 result_ = U512.xor(a_, b_);
+
+        // console.log("xor gas: ", gasBefore_ - gasleft());
+
+        return result_.toBytes();
     }
 
     function xorAssign(
@@ -815,7 +948,12 @@ contract U512Mock {
     function not(bytes memory aBytes_) external view returns (bytes memory rBytes_) {
         uint512 a_ = U512.fromBytes(aBytes_);
 
-        return U512.not(a_).toBytes();
+        // uint256 gasBefore_ = gasleft();
+        uint512 result_ = U512.not(a_);
+
+        // console.log("not gas: ", gasBefore_ - gasleft());
+
+        return result_.toBytes();
     }
 
     function notAssign(bytes memory aBytes_) external view returns (bytes memory rBytes_) {
@@ -841,7 +979,12 @@ contract U512Mock {
     function shl(bytes memory aBytes_, uint8 b_) external view returns (bytes memory rBytes_) {
         uint512 a_ = U512.fromBytes(aBytes_);
 
-        return U512.shl(a_, b_).toBytes();
+        // uint256 gasBefore_ = gasleft();
+        uint512 result_ = U512.shl(a_, b_);
+
+        // console.log("shl gas: ", gasBefore_ - gasleft());
+
+        return result_.toBytes();
     }
 
     function shlAssign(
@@ -871,7 +1014,12 @@ contract U512Mock {
     function shr(bytes memory aBytes_, uint8 b_) external view returns (bytes memory rBytes_) {
         uint512 a_ = U512.fromBytes(aBytes_);
 
-        return U512.shr(a_, b_).toBytes();
+        // uint256 gasBefore_ = gasleft();
+        uint512 result_ = U512.shr(a_, b_);
+
+        // console.log("shr gas: ", gasBefore_ - gasleft());
+
+        return result_.toBytes();
     }
 
     function shrAssign(
diff --git a/test/libs/bn/U512.test.ts b/test/libs/bn/U512.test.ts
index 4ce3ac43..379adb8e 100644
--- a/test/libs/bn/U512.test.ts
+++ b/test/libs/bn/U512.test.ts
@@ -4,7 +4,7 @@ import { Reverter } from "@/test/helpers/reverter";
 
 import { U512Mock } from "@ethers-v6";
 
-describe.only("U512", () => {
+describe("U512", () => {
   const reverter = new Reverter();
 
   const prime = 76884956397045344220809746629001649092737531784414529538755519063063536359079n;
@@ -349,6 +349,20 @@ describe.only("U512", () => {
     }
   });
 
+  it("modexpU256 test", async () => {
+    for (let i = 0; i < 100; ++i) {
+      const a = randomU512();
+      const b = 100n;
+      const m = randomU512();
+      const to = randomU512();
+
+      expect(await u512.modexpU256(a, b, m)).to.equal(modexp(a, toBytes(b), m));
+      expect(await u512.modexpU256Alloc(a, b, m)).to.equal(modexp(a, toBytes(b), m));
+      expect(await u512.modexpU256Assign(a, b, m)).to.equal(modexp(a, toBytes(b), m));
+      expect(await u512.modexpU256AssignTo(a, b, m, to)).to.equal(modexp(a, toBytes(b), m));
+    }
+  });
+
   it("moddiv test", async () => {
     const m = toBytes(prime);
 
diff --git a/test/libs/crypto/ECDSA384.test.ts b/test/libs/crypto/ECDSA384.test.ts
index fb0cf5f1..7ea61fca 100644
--- a/test/libs/crypto/ECDSA384.test.ts
+++ b/test/libs/crypto/ECDSA384.test.ts
@@ -162,7 +162,7 @@ describe("ECDSA384", () => {
     });
   });
 
-  describe("brainpoolP384r1", () => {
+  describe.only("brainpoolP384r1", () => {
     const signature =
       "0x42d803dcea3f9809cda4ce5a541d969dbeacd6ab7bef7788db1e4a00dac3ae87c1c241c24bb39e041725e607718fc322306b08967b56e4e49d7c9afc48833f580ac9b49cdcec0962d564f89a8f0b57a9742573ebcbe709869253e8b466cb33be";
     const pubKey =
diff --git a/test/libs/crypto/ECDSA512.test.ts b/test/libs/crypto/ECDSA512.test.ts
index 240a596c..5b235a5d 100644
--- a/test/libs/crypto/ECDSA512.test.ts
+++ b/test/libs/crypto/ECDSA512.test.ts
@@ -19,7 +19,7 @@ describe("ECDSA512", () => {
 
   afterEach(reverter.revert);
 
-  describe("brainpoolP512r1", () => {
+  describe.only("brainpoolP512r1", () => {
     const signature =
       "0x0bd2593447cc6c02caf99d60418dd42e9a194c910e6755ed0c7059acac656b04ccfe1e8348462ee43066823aee2fed7ca012e9890dfb69866d7ae88b6506f9c744b42304e693796618d090dbcb2a2551c3cb78534611e61fd9d1a5c0938b5b8ec6ed53d2d28999eabbd8e7792d167fcf582492403a6a0f7cc94c73a28fb76b71";
     const pubKey =

From 0aaa9fef6079769f878fa043a2dfca2ef44911a4 Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Sun, 26 Jan 2025 17:11:25 +0200
Subject: [PATCH 32/42] typo

---
 contracts/libs/bn/U512.sol | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contracts/libs/bn/U512.sol b/contracts/libs/bn/U512.sol
index a056c670..10c41bd7 100644
--- a/contracts/libs/bn/U512.sol
+++ b/contracts/libs/bn/U512.sol
@@ -6,7 +6,7 @@ type call is uint256;
 
 /**
  * @notice Low-level library that implements unsigned 512-bit arithmetics.
-
+ *
  * | Statistic   | Avg          |
  * | ----------- | ------------ |
  * | add         | 269 gas      |

From 52672edbeb4400631d060156ed3ac0607330b2d1 Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Sun, 26 Jan 2025 18:02:18 +0200
Subject: [PATCH 33/42] small adjustments

---
 contracts/libs/bn/U512.sol          |  2 +-
 contracts/libs/crypto/ECDSA384.sol  | 19 +++++++------------
 contracts/libs/crypto/ECDSA512.sol  | 19 +++++++------------
 contracts/mock/libs/bn/U512Mock.sol |  4 ++--
 test/libs/bn/U512.test.ts           | 10 +++++-----
 5 files changed, 22 insertions(+), 32 deletions(-)

diff --git a/contracts/libs/bn/U512.sol b/contracts/libs/bn/U512.sol
index 10c41bd7..516a6a0a 100644
--- a/contracts/libs/bn/U512.sol
+++ b/contracts/libs/bn/U512.sol
@@ -180,7 +180,7 @@ library U512 {
      * @param u256_ The 256-bit unsigned integer.
      * @return eq_ True if the integers are equal, false otherwise.
      */
-    function eqUint256(uint512 a_, uint256 u256_) internal pure returns (bool eq_) {
+    function eqU256(uint512 a_, uint256 u256_) internal pure returns (bool eq_) {
         unchecked {
             assembly {
                 eq_ := and(eq(mload(a_), 0), eq(mload(add(a_, 0x20)), u256_))
diff --git a/contracts/libs/crypto/ECDSA384.sol b/contracts/libs/crypto/ECDSA384.sol
index b444392e..ef7d091f 100644
--- a/contracts/libs/crypto/ECDSA384.sol
+++ b/contracts/libs/crypto/ECDSA384.sol
@@ -9,7 +9,7 @@ import {MemoryUtils} from "../utils/MemoryUtils.sol";
  * @notice Cryptography module
  *
  * This library provides functionality for ECDSA verification over any 384-bit curve. Currently,
- * this is the most efficient implementation out there, consuming ~13.86 million gas per call.
+ * this is the most efficient implementation out there, consuming ~8.9 million gas per call.
  *
  * The approach is Strauss-Shamir double scalar multiplication with 6 bits of precompute + affine coordinates.
  */
@@ -82,9 +82,9 @@ library ECDSA384 {
 
             /// accept s only from the lower part of the curve
             if (
-                U512.eqUint256(inputs_.r, 0) ||
+                U512.eqU256(inputs_.r, 0) ||
                 U512.cmp(inputs_.r, params_.n) >= 0 ||
-                U512.eqUint256(inputs_.s, 0) ||
+                U512.eqU256(inputs_.s, 0) ||
                 U512.cmp(inputs_.s, params_.lowSmax) > 0
             ) {
                 return false;
@@ -146,23 +146,18 @@ library ECDSA384 {
         uint512 y_
     ) private view returns (bool) {
         unchecked {
-            if (
-                U512.eqUint256(x_, 0) ||
-                U512.eq(x_, p_) ||
-                U512.eqUint256(y_, 0) ||
-                U512.eq(y_, p_)
-            ) {
+            if (U512.eqU256(x_, 0) || U512.eq(x_, p_) || U512.eqU256(y_, 0) || U512.eq(y_, p_)) {
                 return false;
             }
 
             uint512 lhs_ = U512.modexpU256(call_, y_, 2, p_);
             uint512 rhs_ = U512.modexpU256(call_, x_, 3, p_);
 
-            if (!U512.eqUint256(a_, 0)) {
+            if (!U512.eqU256(a_, 0)) {
                 rhs_ = U512.redadd(call_, rhs_, U512.modmul(call_, x_, a_, p_), p_); // x^3 + a*x
             }
 
-            if (!U512.eqUint256(b_, 0)) {
+            if (!U512.eqU256(b_, 0)) {
                 rhs_ = U512.redadd(call_, rhs_, b_, p_); // x^3 + a*x + b
             }
 
@@ -278,7 +273,7 @@ library ECDSA384 {
                 return (x2_, y2_);
             }
 
-            if (U512.eqUint256(y1_, 0)) {
+            if (U512.eqU256(y1_, 0)) {
                 return (x2_, y2_);
             }
 
diff --git a/contracts/libs/crypto/ECDSA512.sol b/contracts/libs/crypto/ECDSA512.sol
index 75ea4fc6..ff70140c 100644
--- a/contracts/libs/crypto/ECDSA512.sol
+++ b/contracts/libs/crypto/ECDSA512.sol
@@ -9,7 +9,7 @@ import {MemoryUtils} from "../utils/MemoryUtils.sol";
  * @notice Cryptography module
  *
  * This library provides functionality for ECDSA verification over any 512-bit curve. Currently,
- * this is the most efficient implementation out there, consuming ~20.1 million gas per call.
+ * this is the most efficient implementation out there, consuming ~13.6 million gas per call.
  *
  * The approach is Strauss-Shamir double scalar multiplication with 6 bits of precompute + affine coordinates.
  */
@@ -82,9 +82,9 @@ library ECDSA512 {
 
             /// accept s only from the lower part of the curve
             if (
-                U512.eqUint256(inputs_.r, 0) ||
+                U512.eqU256(inputs_.r, 0) ||
                 U512.cmp(inputs_.r, params_.n) >= 0 ||
-                U512.eqUint256(inputs_.s, 0) ||
+                U512.eqU256(inputs_.s, 0) ||
                 U512.cmp(inputs_.s, params_.lowSmax) > 0
             ) {
                 return false;
@@ -146,23 +146,18 @@ library ECDSA512 {
         uint512 y_
     ) private view returns (bool) {
         unchecked {
-            if (
-                U512.eqUint256(x_, 0) ||
-                U512.eq(x_, p_) ||
-                U512.eqUint256(y_, 0) ||
-                U512.eq(y_, p_)
-            ) {
+            if (U512.eqU256(x_, 0) || U512.eq(x_, p_) || U512.eqU256(y_, 0) || U512.eq(y_, p_)) {
                 return false;
             }
 
             uint512 lhs_ = U512.modexpU256(call_, y_, 2, p_);
             uint512 rhs_ = U512.modexpU256(call_, x_, 3, p_);
 
-            if (!U512.eqUint256(a_, 0)) {
+            if (!U512.eqU256(a_, 0)) {
                 rhs_ = U512.redadd(call_, rhs_, U512.modmul(call_, x_, a_, p_), p_); // x^3 + a*x
             }
 
-            if (!U512.eqUint256(b_, 0)) {
+            if (!U512.eqU256(b_, 0)) {
                 rhs_ = U512.redadd(call_, rhs_, b_, p_); // x^3 + a*x + b
             }
 
@@ -286,7 +281,7 @@ library ECDSA512 {
                 return (x2_, y2_);
             }
 
-            if (U512.eqUint256(y1_, 0)) {
+            if (U512.eqU256(y1_, 0)) {
                 return (x2_, y2_);
             }
 
diff --git a/contracts/mock/libs/bn/U512Mock.sol b/contracts/mock/libs/bn/U512Mock.sol
index 45652635..3922c88f 100644
--- a/contracts/mock/libs/bn/U512Mock.sol
+++ b/contracts/mock/libs/bn/U512Mock.sol
@@ -60,10 +60,10 @@ contract U512Mock {
         return U512.eq(a_, b_);
     }
 
-    function eqUint256(bytes memory aBytes_, uint256 u256_) external view returns (bool eq_) {
+    function eqU256(bytes memory aBytes_, uint256 u256_) external view returns (bool eq_) {
         uint512 a_ = U512.fromBytes(aBytes_);
 
-        return U512.eqUint256(a_, u256_);
+        return U512.eqU256(a_, u256_);
     }
 
     function cmp(bytes memory aBytes_, bytes memory bBytes_) external view returns (int256) {
diff --git a/test/libs/bn/U512.test.ts b/test/libs/bn/U512.test.ts
index 379adb8e..932603b3 100644
--- a/test/libs/bn/U512.test.ts
+++ b/test/libs/bn/U512.test.ts
@@ -4,7 +4,7 @@ import { Reverter } from "@/test/helpers/reverter";
 
 import { U512Mock } from "@ethers-v6";
 
-describe("U512", () => {
+describe.only("U512", () => {
   const reverter = new Reverter();
 
   const prime = 76884956397045344220809746629001649092737531784414529538755519063063536359079n;
@@ -183,10 +183,10 @@ describe("U512", () => {
     expect(await u512.eq("0x00", "0x00")).to.be.true;
   });
 
-  it("eqUint256 test", async () => {
-    expect(await u512.eqUint256(toBytes(1020n), 1002n)).to.be.false;
-    expect(await u512.eqUint256(toBytes(200n), 200n)).to.be.true;
-    expect(await u512.eqUint256("0x00", 0)).to.be.true;
+  it("eqU256 test", async () => {
+    expect(await u512.eqU256(toBytes(1020n), 1002n)).to.be.false;
+    expect(await u512.eqU256(toBytes(200n), 200n)).to.be.true;
+    expect(await u512.eqU256("0x00", 0)).to.be.true;
   });
 
   it("cmp test", async () => {

From d87784e314991ebe650701b4fec8e143ee270cfc Mon Sep 17 00:00:00 2001
From: mllwchrry <mariia.zhvanko@gmail.com>
Date: Mon, 27 Jan 2025 14:19:52 +0200
Subject: [PATCH 34/42] add U512 usage example and fix tests

---
 .solcover.ts                        |  2 +-
 contracts/libs/bn/U512.sol          | 24 ++++++++++++++++++++++++
 contracts/mock/libs/bn/U512Mock.sol |  8 ++++----
 hardhat.config.ts                   |  2 +-
 package.json                        |  4 ++--
 test/libs/bn/U512.test.ts           |  2 +-
 test/libs/crypto/ECDSA384.test.ts   |  4 ++--
 test/libs/crypto/ECDSA512.test.ts   |  4 ++--
 8 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/.solcover.ts b/.solcover.ts
index 516fb92c..bf2a164d 100644
--- a/.solcover.ts
+++ b/.solcover.ts
@@ -1,4 +1,4 @@
 module.exports = {
-  skipFiles: ["interfaces/", "mock/"],
+  skipFiles: ["interfaces/", "mock/", "libs/crypto/ECDSA512.sol", "libs/crypto/ECDSA384.sol"],
   configureYulOptimizer: true,
 };
diff --git a/contracts/libs/bn/U512.sol b/contracts/libs/bn/U512.sol
index 516a6a0a..83e4f0a2 100644
--- a/contracts/libs/bn/U512.sol
+++ b/contracts/libs/bn/U512.sol
@@ -28,6 +28,30 @@ type call is uint256;
  * | not         | 216 gas      |
  * | shl         | 272 gas      |
  * | shr         | 272 gas      |
+ *
+ * ## Usage example:
+ *
+ * ```
+ * using U512 for uint512;
+ *
+ * uint512 u512 = U512.fromUint256(12345678901234567890);
+ * uint512 modulus = U512.fromUint256(987654321987654321);
+ *
+ * call callPointer = U512.initCall();
+ *
+ * // Modular arithmetic with a call (no memory reallocation for each operation)
+ * uint512 result = U512.mod(callPointer, u512, modulus);
+ * U512.modaddAssign(callPointer, u512, modulus);
+ *
+ * // Modular arithmetic without a call (memory will be allocated for each operation)
+ * result = u512.mod(modulus);
+ * u512.modaddAssign(modulus);
+ *
+ * u512.subAssign(result);
+ *
+ * uint512 u512Copy = u512.copy();
+ * bool isEqual = u512.isEq(u512Copy);
+ * ```
  */
 library U512 {
     uint256 private constant _UINT512_ALLOCATION = 64;
diff --git a/contracts/mock/libs/bn/U512Mock.sol b/contracts/mock/libs/bn/U512Mock.sol
index 3922c88f..f10aa348 100644
--- a/contracts/mock/libs/bn/U512Mock.sol
+++ b/contracts/mock/libs/bn/U512Mock.sol
@@ -844,11 +844,11 @@ contract U512Mock {
         uint512 b_ = U512.fromBytes(bBytes_);
 
         // uint256 gasBefore_ = gasleft();
-        uint512 result_ = U512.and(a_, b_);
+        U512.andAssign(a_, b_);
 
         // console.log("and gas: ", gasBefore_ - gasleft());
 
-        return result_.toBytes();
+        return a_.toBytes();
     }
 
     function andAssignTo(
@@ -883,11 +883,11 @@ contract U512Mock {
         uint512 b_ = U512.fromBytes(bBytes_);
 
         // uint256 gasBefore_ = gasleft();
-        uint512 result_ = U512.or(a_, b_);
+        U512.orAssign(a_, b_);
 
         // console.log("or gas: ", gasBefore_ - gasleft());
 
-        return result_.toBytes();
+        return a_.toBytes();
     }
 
     function orAssignTo(
diff --git a/hardhat.config.ts b/hardhat.config.ts
index 71d85f0d..7bed7690 100644
--- a/hardhat.config.ts
+++ b/hardhat.config.ts
@@ -61,7 +61,7 @@ const config: HardhatUserConfig = {
   gasReporter: {
     currency: "USD",
     gasPrice: 50,
-    enabled: true,
+    enabled: false,
     reportPureAndViewMethods: true,
     coinmarketcap: `${process.env.COINMARKETCAP_KEY}`,
   },
diff --git a/package.json b/package.json
index ac907dd0..4317577f 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@solarity/solidity-lib",
-  "version": "2.7.15",
+  "version": "2.7.16",
   "license": "MIT",
   "author": "Distributed Lab",
   "readme": "README.md",
@@ -23,7 +23,7 @@
   "scripts": {
     "prepare": "husky",
     "compile": "npx hardhat compile --force",
-    "coverage": "NODE_OPTIONS='--max-old-space-size=8192' npx hardhat coverage --solcoverjs ./.solcover.ts",
+    "coverage": "npx hardhat coverage --solcoverjs ./.solcover.ts",
     "test": "npx hardhat test",
     "private-network": "npx hardhat node",
     "lint-fix": "npm run lint-sol-fix && npm run lint-ts-fix && npm run lint-json-fix",
diff --git a/test/libs/bn/U512.test.ts b/test/libs/bn/U512.test.ts
index 932603b3..21788ebb 100644
--- a/test/libs/bn/U512.test.ts
+++ b/test/libs/bn/U512.test.ts
@@ -4,7 +4,7 @@ import { Reverter } from "@/test/helpers/reverter";
 
 import { U512Mock } from "@ethers-v6";
 
-describe.only("U512", () => {
+describe("U512", () => {
   const reverter = new Reverter();
 
   const prime = 76884956397045344220809746629001649092737531784414529538755519063063536359079n;
diff --git a/test/libs/crypto/ECDSA384.test.ts b/test/libs/crypto/ECDSA384.test.ts
index 7ea61fca..e587af29 100644
--- a/test/libs/crypto/ECDSA384.test.ts
+++ b/test/libs/crypto/ECDSA384.test.ts
@@ -30,7 +30,7 @@ function modifyRight(value: string, modifier: string): string {
   return newSignature;
 }
 
-describe("ECDSA384", () => {
+describe.skip("ECDSA384", () => {
   const reverter = new Reverter();
 
   let ecdsa384: ECDSA384Mock;
@@ -162,7 +162,7 @@ describe("ECDSA384", () => {
     });
   });
 
-  describe.only("brainpoolP384r1", () => {
+  describe("brainpoolP384r1", () => {
     const signature =
       "0x42d803dcea3f9809cda4ce5a541d969dbeacd6ab7bef7788db1e4a00dac3ae87c1c241c24bb39e041725e607718fc322306b08967b56e4e49d7c9afc48833f580ac9b49cdcec0962d564f89a8f0b57a9742573ebcbe709869253e8b466cb33be";
     const pubKey =
diff --git a/test/libs/crypto/ECDSA512.test.ts b/test/libs/crypto/ECDSA512.test.ts
index 5b235a5d..92a659bf 100644
--- a/test/libs/crypto/ECDSA512.test.ts
+++ b/test/libs/crypto/ECDSA512.test.ts
@@ -4,7 +4,7 @@ import { Reverter } from "@/test/helpers/reverter";
 
 import { ECDSA512Mock } from "@ethers-v6";
 
-describe("ECDSA512", () => {
+describe.skip("ECDSA512", () => {
   const reverter = new Reverter();
 
   let ecdsa512: ECDSA512Mock;
@@ -19,7 +19,7 @@ describe("ECDSA512", () => {
 
   afterEach(reverter.revert);
 
-  describe.only("brainpoolP512r1", () => {
+  describe("brainpoolP512r1", () => {
     const signature =
       "0x0bd2593447cc6c02caf99d60418dd42e9a194c910e6755ed0c7059acac656b04ccfe1e8348462ee43066823aee2fed7ca012e9890dfb69866d7ae88b6506f9c744b42304e693796618d090dbcb2a2551c3cb78534611e61fd9d1a5c0938b5b8ec6ed53d2d28999eabbd8e7792d167fcf582492403a6a0f7cc94c73a28fb76b71";
     const pubKey =

From bbca148d75fe93e986ba46069f3da7adc0665b20 Mon Sep 17 00:00:00 2001
From: mllwchrry <mariia.zhvanko@gmail.com>
Date: Mon, 27 Jan 2025 14:40:48 +0200
Subject: [PATCH 35/42] fix natspec

---
 contracts/libs/bn/U512.sol | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/contracts/libs/bn/U512.sol b/contracts/libs/bn/U512.sol
index 83e4f0a2..ae50f058 100644
--- a/contracts/libs/bn/U512.sol
+++ b/contracts/libs/bn/U512.sol
@@ -50,7 +50,13 @@ type call is uint256;
  * u512.subAssign(result);
  *
  * uint512 u512Copy = u512.copy();
- * bool isEqual = u512.isEq(u512Copy);
+ * bool isEqual = u512.eq(u512Copy);
+ *
+ * uint512 a = U512.fromUint256(3);
+ * uint512 b = U512.fromUint256(6);
+ * uint512 m = U512.fromUint256(5);
+ * uint512 r = a.modadd(b, m);
+ * r.toBytes(); // "0x0...04"
  * ```
  */
 library U512 {

From 4b65a75302ae9051577b617ec707534e039f889f Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Mon, 27 Jan 2025 15:01:39 +0200
Subject: [PATCH 36/42] fixed comment

---
 contracts/libs/bn/U512.sol | 39 +++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/contracts/libs/bn/U512.sol b/contracts/libs/bn/U512.sol
index ae50f058..442eeffb 100644
--- a/contracts/libs/bn/U512.sol
+++ b/contracts/libs/bn/U512.sol
@@ -34,30 +34,31 @@ type call is uint256;
  * ```
  * using U512 for uint512;
  *
- * uint512 u512 = U512.fromUint256(12345678901234567890);
- * uint512 modulus = U512.fromUint256(987654321987654321);
+ * uint512 a_ = U512.fromUint256(3);
+ * uint512 b_ = U512.fromUint256(6);
+ * uint512 m_ = U512.fromUint256(5);
+ * uint512 r_ = a.modadd(b_, m_);
  *
- * call callPointer = U512.initCall();
- *
- * // Modular arithmetic with a call (no memory reallocation for each operation)
- * uint512 result = U512.mod(callPointer, u512, modulus);
- * U512.modaddAssign(callPointer, u512, modulus);
- *
- * // Modular arithmetic without a call (memory will be allocated for each operation)
- * result = u512.mod(modulus);
- * u512.modaddAssign(modulus);
+ * r_.eq(U512.fromUint256(4)); // true
+ * ```
  *
- * u512.subAssign(result);
+ * Note that each mod call allocates extra memory for invoking the precompile. This is fine for lightweight
+ * functions (under 1 million gas). However, for heavier functions, consider allocating memory once and reusing
+ * it in subsequent calls. This approach can help reduce gas costs. Additionally, use assignment functions to avoid
+ * allocating memory for new local variables, instead assigning values to existing ones.
  *
- * uint512 u512Copy = u512.copy();
- * bool isEqual = u512.eq(u512Copy);
+ * ```
+ * using U512 for uint512;
  *
- * uint512 a = U512.fromUint256(3);
- * uint512 b = U512.fromUint256(6);
- * uint512 m = U512.fromUint256(5);
- * uint512 r = a.modadd(b, m);
- * r.toBytes(); // "0x0...04"
+ * call call_ = U512.initCall();
+ * uint512 a_ = U512.fromUint256(3);
+ * uint512 b_ = U512.fromUint256(6);
+ * uint512 m_ = U512.fromUint256(5);
+ * uint512 r_ = a.modadd(call_, b_, m_); // 4
+ * r_.mulmodAssignTo(a_, m_); // 2
+ * r_.eq(U512.fromUint256(2)); // true
  * ```
+ *
  */
 library U512 {
     uint256 private constant _UINT512_ALLOCATION = 64;

From bf130bea81bead02fb92731efd806dce56cba7fc Mon Sep 17 00:00:00 2001
From: mllwchrry <mariia.zhvanko@gmail.com>
Date: Mon, 27 Jan 2025 15:21:32 +0200
Subject: [PATCH 37/42] add toBytes to natspec

---
 contracts/libs/bn/U512.sol | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/contracts/libs/bn/U512.sol b/contracts/libs/bn/U512.sol
index 442eeffb..5c49ae72 100644
--- a/contracts/libs/bn/U512.sol
+++ b/contracts/libs/bn/U512.sol
@@ -55,8 +55,9 @@ type call is uint256;
  * uint512 b_ = U512.fromUint256(6);
  * uint512 m_ = U512.fromUint256(5);
  * uint512 r_ = a.modadd(call_, b_, m_); // 4
- * r_.mulmodAssignTo(a_, m_); // 2
+ * r_.modmulAssign(a_, m_); // 2
  * r_.eq(U512.fromUint256(2)); // true
+ * r_.toBytes(); // "0x0...02"
  * ```
  *
  */

From 66ba3e130b64a7fae90d89429e296de2d6266473 Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Mon, 27 Jan 2025 17:51:55 +0200
Subject: [PATCH 38/42] typos

---
 contracts/libs/bn/U512.sol | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/contracts/libs/bn/U512.sol b/contracts/libs/bn/U512.sol
index 5c49ae72..af73a7b6 100644
--- a/contracts/libs/bn/U512.sol
+++ b/contracts/libs/bn/U512.sol
@@ -38,7 +38,6 @@ type call is uint256;
  * uint512 b_ = U512.fromUint256(6);
  * uint512 m_ = U512.fromUint256(5);
  * uint512 r_ = a.modadd(b_, m_);
- *
  * r_.eq(U512.fromUint256(4)); // true
  * ```
  *
@@ -57,7 +56,7 @@ type call is uint256;
  * uint512 r_ = a.modadd(call_, b_, m_); // 4
  * r_.modmulAssign(a_, m_); // 2
  * r_.eq(U512.fromUint256(2)); // true
- * r_.toBytes(); // "0x0...02"
+ * r_.toBytes(); // "0x00..02"
  * ```
  *
  */

From d5a813c5b309154d6574af9da1acb3b428c0cfeb Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Mon, 27 Jan 2025 17:52:44 +0200
Subject: [PATCH 39/42] typos

---
 contracts/libs/bn/U512.sol | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/contracts/libs/bn/U512.sol b/contracts/libs/bn/U512.sol
index af73a7b6..7d6e228e 100644
--- a/contracts/libs/bn/U512.sol
+++ b/contracts/libs/bn/U512.sol
@@ -42,8 +42,8 @@ type call is uint256;
  * ```
  *
  * Note that each mod call allocates extra memory for invoking the precompile. This is fine for lightweight
- * functions (under 1 million gas). However, for heavier functions, consider allocating memory once and reusing
- * it in subsequent calls. This approach can help reduce gas costs. Additionally, use assignment functions to avoid
+ * functions. However, for heavy functions, consider allocating memory once and reusing it in subsequent calls.
+ * This approach can help reduce gas costs. Additionally, use assignment functions to avoid
  * allocating memory for new local variables, instead assigning values to existing ones.
  *
  * ```

From 0a427dc40a526b9cd91a3c5bfcb27185839c0444 Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Mon, 27 Jan 2025 17:53:51 +0200
Subject: [PATCH 40/42] typos

---
 contracts/libs/bn/U512.sol | 1 -
 1 file changed, 1 deletion(-)

diff --git a/contracts/libs/bn/U512.sol b/contracts/libs/bn/U512.sol
index 7d6e228e..1299d661 100644
--- a/contracts/libs/bn/U512.sol
+++ b/contracts/libs/bn/U512.sol
@@ -58,7 +58,6 @@ type call is uint256;
  * r_.eq(U512.fromUint256(2)); // true
  * r_.toBytes(); // "0x00..02"
  * ```
- *
  */
 library U512 {
     uint256 private constant _UINT512_ALLOCATION = 64;

From 8dd88d9710cfdc2f564c8b6d79cbfd5f7be94f2d Mon Sep 17 00:00:00 2001
From: dovgopoly <idovgopoly@gmail.com>
Date: Tue, 28 Jan 2025 13:07:16 +0200
Subject: [PATCH 41/42] small adjustments

---
 contracts/libs/bn/U512.sol          | 126 +++++++++++++++-------------
 contracts/libs/crypto/ECDSA384.sol  |  14 ++--
 contracts/libs/crypto/ECDSA512.sol  |  14 ++--
 contracts/mock/libs/bn/U512Mock.sol |  62 +++++++-------
 4 files changed, 115 insertions(+), 101 deletions(-)

diff --git a/contracts/libs/bn/U512.sol b/contracts/libs/bn/U512.sol
index 1299d661..b6689334 100644
--- a/contracts/libs/bn/U512.sol
+++ b/contracts/libs/bn/U512.sol
@@ -2,7 +2,7 @@
 pragma solidity ^0.8.4;
 
 type uint512 is uint256;
-type call is uint256;
+type call512 is uint256;
 
 /**
  * @notice Low-level library that implements unsigned 512-bit arithmetics.
@@ -29,6 +29,14 @@ type call is uint256;
  * | shl         | 272 gas      |
  * | shr         | 272 gas      |
  *
+ * ## Imports:
+ *
+ * First import the library and all the necessary types.
+ *
+ * ```
+ * import {U512, uint512, call512} from "U512.sol";
+ * ```
+ *
  * ## Usage example:
  *
  * ```
@@ -49,7 +57,7 @@ type call is uint256;
  * ```
  * using U512 for uint512;
  *
- * call call_ = U512.initCall();
+ * call512 call_ = U512.initCall();
  * uint512 a_ = U512.fromUint256(3);
  * uint512 b_ = U512.fromUint256(6);
  * uint512 m_ = U512.fromUint256(5);
@@ -68,9 +76,9 @@ library U512 {
      * @notice Initializes a memory pointer for precompile call arguments.
      * @return call_ A memory pointer for precompile operations.
      */
-    function initCall() internal pure returns (call call_) {
+    function initCall() internal pure returns (call512 call_) {
         unchecked {
-            call_ = call.wrap(_allocate(_CALL_ALLOCATION));
+            call_ = call512.wrap(_allocate(_CALL_ALLOCATION));
 
             assembly {
                 call_ := add(call_, 0x40)
@@ -266,7 +274,7 @@ library U512 {
      * @param m_ The modulus.
      * @return r_ The result of the modular operation `(a_ % m_)`.
      */
-    function mod(call call_, uint512 a_, uint512 m_) internal view returns (uint512 r_) {
+    function mod(call512 call_, uint512 a_, uint512 m_) internal view returns (uint512 r_) {
         unchecked {
             r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
 
@@ -284,7 +292,7 @@ library U512 {
     function mod(uint512 a_, uint512 m_) internal view returns (uint512 r_) {
         unchecked {
             r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
-            call call_ = initCall();
+            call512 call_ = initCall();
 
             _mod(call_, a_, m_, r_);
         }
@@ -297,7 +305,7 @@ library U512 {
      * @param a_ The dividend.
      * @param m_ The modulus.
      */
-    function modAssign(call call_, uint512 a_, uint512 m_) internal view {
+    function modAssign(call512 call_, uint512 a_, uint512 m_) internal view {
         unchecked {
             _mod(call_, a_, m_, a_);
         }
@@ -311,7 +319,7 @@ library U512 {
      * @param m_ The modulus.
      * @param to_ The target 512-bit unsigned integer to store the result.
      */
-    function modAssignTo(call call_, uint512 a_, uint512 m_, uint512 to_) internal view {
+    function modAssignTo(call512 call_, uint512 a_, uint512 m_, uint512 to_) internal view {
         unchecked {
             _mod(call_, a_, m_, to_);
         }
@@ -325,7 +333,7 @@ library U512 {
      * @param m_ The modulus.
      * @return r_ The modular inverse result `a_^(-1) % m_`.
      */
-    function modinv(call call_, uint512 a_, uint512 m_) internal view returns (uint512 r_) {
+    function modinv(call512 call_, uint512 a_, uint512 m_) internal view returns (uint512 r_) {
         unchecked {
             r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
 
@@ -344,7 +352,7 @@ library U512 {
     function modinv(uint512 a_, uint512 m_) internal view returns (uint512 r_) {
         unchecked {
             r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
-            call call_ = initCall();
+            call512 call_ = initCall();
 
             _modinv(call_, a_, m_, r_);
         }
@@ -358,7 +366,7 @@ library U512 {
      * @param a_ The 512-bit unsigned integer to invert.
      * @param m_ The modulus.
      */
-    function modinvAssign(call call_, uint512 a_, uint512 m_) internal view {
+    function modinvAssign(call512 call_, uint512 a_, uint512 m_) internal view {
         unchecked {
             _modinv(call_, a_, m_, a_);
         }
@@ -373,7 +381,7 @@ library U512 {
      * @param m_ The modulus.
      * @param to_ The target 512-bit unsigned integer to store the result.
      */
-    function modinvAssignTo(call call_, uint512 a_, uint512 m_, uint512 to_) internal view {
+    function modinvAssignTo(call512 call_, uint512 a_, uint512 m_, uint512 to_) internal view {
         unchecked {
             _modinv(call_, a_, m_, to_);
         }
@@ -388,7 +396,7 @@ library U512 {
      * @return r_ The result of modular exponentiation `(b_^e_) % m_`.
      */
     function modexp(
-        call call_,
+        call512 call_,
         uint512 b_,
         uint512 e_,
         uint512 m_
@@ -411,7 +419,7 @@ library U512 {
     function modexp(uint512 b_, uint512 e_, uint512 m_) internal view returns (uint512 r_) {
         unchecked {
             r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
-            call call_ = initCall();
+            call512 call_ = initCall();
 
             _modexp(call_, b_, e_, m_, r_);
         }
@@ -425,7 +433,7 @@ library U512 {
      * @param e_ The exponent.
      * @param m_ The modulus.
      */
-    function modexpAssign(call call_, uint512 b_, uint512 e_, uint512 m_) internal view {
+    function modexpAssign(call512 call_, uint512 b_, uint512 e_, uint512 m_) internal view {
         unchecked {
             _modexp(call_, b_, e_, m_, b_);
         }
@@ -441,7 +449,7 @@ library U512 {
      * @param to_ The target 512-bit unsigned integer to store the result.
      */
     function modexpAssignTo(
-        call call_,
+        call512 call_,
         uint512 b_,
         uint512 e_,
         uint512 m_,
@@ -461,7 +469,7 @@ library U512 {
      * @return r_ The result of modular exponentiation `(b_^e_) % m_`.
      */
     function modexpU256(
-        call call_,
+        call512 call_,
         uint512 b_,
         uint256 e_,
         uint512 m_
@@ -484,7 +492,7 @@ library U512 {
     function modexpU256(uint512 b_, uint256 e_, uint512 m_) internal view returns (uint512 r_) {
         unchecked {
             r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
-            call call_ = initCall();
+            call512 call_ = initCall();
 
             _modexpU256(call_, b_, e_, m_, r_);
         }
@@ -498,7 +506,7 @@ library U512 {
      * @param e_ The exponent.
      * @param m_ The modulus.
      */
-    function modexpU256Assign(call call_, uint512 b_, uint256 e_, uint512 m_) internal view {
+    function modexpU256Assign(call512 call_, uint512 b_, uint256 e_, uint512 m_) internal view {
         unchecked {
             _modexpU256(call_, b_, e_, m_, b_);
         }
@@ -514,7 +522,7 @@ library U512 {
      * @param to_ The target 512-bit unsigned integer to store the result.
      */
     function modexpU256AssignTo(
-        call call_,
+        call512 call_,
         uint512 b_,
         uint256 e_,
         uint512 m_,
@@ -534,7 +542,7 @@ library U512 {
      * @return r_ The result of the modular addition `(a_ + b_) % m_`.
      */
     function modadd(
-        call call_,
+        call512 call_,
         uint512 a_,
         uint512 b_,
         uint512 m_
@@ -557,7 +565,7 @@ library U512 {
     function modadd(uint512 a_, uint512 b_, uint512 m_) internal view returns (uint512 r_) {
         unchecked {
             r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
-            call call_ = initCall();
+            call512 call_ = initCall();
 
             _modadd(call_, a_, b_, m_, r_);
         }
@@ -571,7 +579,7 @@ library U512 {
      * @param b_ The second addend.
      * @param m_ The modulus.
      */
-    function modaddAssign(call call_, uint512 a_, uint512 b_, uint512 m_) internal view {
+    function modaddAssign(call512 call_, uint512 a_, uint512 b_, uint512 m_) internal view {
         unchecked {
             _modadd(call_, a_, b_, m_, a_);
         }
@@ -587,7 +595,7 @@ library U512 {
      * @param to_ The target 512-bit unsigned integer to store the result.
      */
     function modaddAssignTo(
-        call call_,
+        call512 call_,
         uint512 a_,
         uint512 b_,
         uint512 m_,
@@ -647,7 +655,7 @@ library U512 {
      * @return r_ The result of the modular addition `(a_ + b_) % m_`.
      */
     function redadd(
-        call call_,
+        call512 call_,
         uint512 a_,
         uint512 b_,
         uint512 m_
@@ -673,7 +681,7 @@ library U512 {
             r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
 
             // `redadd` doesn't make calls, it only requires 2 words for buffer.
-            call call_ = call.wrap(_allocate(_UINT512_ALLOCATION));
+            call512 call_ = call512.wrap(_allocate(_UINT512_ALLOCATION));
 
             _redadd(call_, a_, b_, m_, r_);
         }
@@ -687,7 +695,7 @@ library U512 {
      * @param b_ The second addend, reduced by `m_`.
      * @param m_ The modulus.
      */
-    function redaddAssign(call call_, uint512 a_, uint512 b_, uint512 m_) internal pure {
+    function redaddAssign(call512 call_, uint512 a_, uint512 b_, uint512 m_) internal pure {
         unchecked {
             _redadd(call_, a_, b_, m_, a_);
         }
@@ -703,7 +711,7 @@ library U512 {
      * @param to_ The target 512-bit unsigned integer to store the result.
      */
     function redaddAssignTo(
-        call call_,
+        call512 call_,
         uint512 a_,
         uint512 b_,
         uint512 m_,
@@ -723,7 +731,7 @@ library U512 {
      * @return r_ The result of the modular subtraction `(a_ - b_) % m_`.
      */
     function modsub(
-        call call_,
+        call512 call_,
         uint512 a_,
         uint512 b_,
         uint512 m_
@@ -746,7 +754,7 @@ library U512 {
     function modsub(uint512 a_, uint512 b_, uint512 m_) internal view returns (uint512 r_) {
         unchecked {
             r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
-            call call_ = initCall();
+            call512 call_ = initCall();
 
             _modsub(call_, a_, b_, m_, r_);
         }
@@ -759,7 +767,7 @@ library U512 {
      * @param b_ The subtrahend.
      * @param m_ The modulus.
      */
-    function modsubAssign(call call_, uint512 a_, uint512 b_, uint512 m_) internal view {
+    function modsubAssign(call512 call_, uint512 a_, uint512 b_, uint512 m_) internal view {
         unchecked {
             _modsub(call_, a_, b_, m_, a_);
         }
@@ -774,7 +782,7 @@ library U512 {
      * @param to_ The target 512-bit unsigned integer to store the result.
      */
     function modsubAssignTo(
-        call call_,
+        call512 call_,
         uint512 a_,
         uint512 b_,
         uint512 m_,
@@ -834,7 +842,7 @@ library U512 {
      * @return r_ The result of the modular subtraction `(a_ - b_) % m_`.
      */
     function redsub(
-        call call_,
+        call512 call_,
         uint512 a_,
         uint512 b_,
         uint512 m_
@@ -860,7 +868,7 @@ library U512 {
             r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
 
             // `redsub` doesn't make calls, it only requires 2 words for buffer.
-            call call_ = call.wrap(_allocate(_UINT512_ALLOCATION));
+            call512 call_ = call512.wrap(_allocate(_UINT512_ALLOCATION));
 
             _redsub(call_, a_, b_, m_, r_);
         }
@@ -874,7 +882,7 @@ library U512 {
      * @param b_ The subtrahend, reduced by `m_`.
      * @param m_ The modulus.
      */
-    function redsubAssign(call call_, uint512 a_, uint512 b_, uint512 m_) internal pure {
+    function redsubAssign(call512 call_, uint512 a_, uint512 b_, uint512 m_) internal pure {
         unchecked {
             _redsub(call_, a_, b_, m_, a_);
         }
@@ -890,7 +898,7 @@ library U512 {
      * @param to_ The target 512-bit unsigned integer to store the result.
      */
     function redsubAssignTo(
-        call call_,
+        call512 call_,
         uint512 a_,
         uint512 b_,
         uint512 m_,
@@ -910,7 +918,7 @@ library U512 {
      * @return r_ The result of the modular multiplication `(a_ * b_) % m_`.
      */
     function modmul(
-        call call_,
+        call512 call_,
         uint512 a_,
         uint512 b_,
         uint512 m_
@@ -933,7 +941,7 @@ library U512 {
     function modmul(uint512 a_, uint512 b_, uint512 m_) internal view returns (uint512 r_) {
         unchecked {
             r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
-            call call_ = initCall();
+            call512 call_ = initCall();
 
             _modmul(call_, a_, b_, m_, r_);
         }
@@ -947,7 +955,7 @@ library U512 {
      * @param b_ The second factor.
      * @param m_ The modulus.
      */
-    function modmulAssign(call call_, uint512 a_, uint512 b_, uint512 m_) internal view {
+    function modmulAssign(call512 call_, uint512 a_, uint512 b_, uint512 m_) internal view {
         unchecked {
             _modmul(call_, a_, b_, m_, a_);
         }
@@ -963,7 +971,7 @@ library U512 {
      * @param to_ The target 512-bit unsigned integer to store the result.
      */
     function modmulAssignTo(
-        call call_,
+        call512 call_,
         uint512 a_,
         uint512 b_,
         uint512 m_,
@@ -1024,7 +1032,7 @@ library U512 {
      * @return r_ The result of the modular division.
      */
     function moddiv(
-        call call_,
+        call512 call_,
         uint512 a_,
         uint512 b_,
         uint512 m_
@@ -1049,7 +1057,7 @@ library U512 {
     function moddiv(uint512 a_, uint512 b_, uint512 m_) internal view returns (uint512 r_) {
         unchecked {
             r_ = uint512.wrap(_allocate(_UINT512_ALLOCATION));
-            call call_ = initCall();
+            call512 call_ = initCall();
 
             _moddiv(call_, a_, b_, m_, r_);
         }
@@ -1064,7 +1072,7 @@ library U512 {
      * @param b_ The divisor.
      * @param m_ The modulus.
      */
-    function moddivAssign(call call_, uint512 a_, uint512 b_, uint512 m_) internal view {
+    function moddivAssign(call512 call_, uint512 a_, uint512 b_, uint512 m_) internal view {
         unchecked {
             _moddiv(call_, a_, b_, m_, a_);
         }
@@ -1081,7 +1089,7 @@ library U512 {
      * @param to_ The target 512-bit unsigned integer to store the result.
      */
     function moddivAssignTo(
-        call call_,
+        call512 call_,
         uint512 a_,
         uint512 b_,
         uint512 m_,
@@ -1327,7 +1335,7 @@ library U512 {
      * @notice Performs modular arithmetic using the EVM precompiled contract.
      * @dev Computes `(a_ % m_)` and stores the result in `r_`.
      */
-    function _mod(call call_, uint512 a_, uint512 m_, uint512 r_) private view {
+    function _mod(call512 call_, uint512 a_, uint512 m_, uint512 r_) private view {
         unchecked {
             assembly {
                 mstore(call_, 0x40)
@@ -1348,7 +1356,7 @@ library U512 {
      * @notice Performs modular exponentiation using the EVM precompiled contract.
      * @dev Computes `(a_^e_) % m_` and stores the result in `r_`.
      */
-    function _modexp(call call_, uint512 a_, uint512 e_, uint512 m_, uint512 r_) private view {
+    function _modexp(call512 call_, uint512 a_, uint512 e_, uint512 m_, uint512 r_) private view {
         unchecked {
             assembly {
                 mstore(call_, 0x40)
@@ -1370,7 +1378,13 @@ library U512 {
      * @notice Performs modular exponentiation using the EVM precompiled contract.
      * @dev Computes `(a_^e_) % m_` and stores the result in `r_`.
      */
-    function _modexpU256(call call_, uint512 a_, uint256 e_, uint512 m_, uint512 r_) private view {
+    function _modexpU256(
+        call512 call_,
+        uint512 a_,
+        uint256 e_,
+        uint512 m_,
+        uint512 r_
+    ) private view {
         unchecked {
             assembly {
                 mstore(call_, 0x40)
@@ -1392,7 +1406,7 @@ library U512 {
      * @dev The modulus `m_` must be a prime number.
      * @dev Computes `a_^(-1) % m_` and stores the result in `r_`.
      */
-    function _modinv(call call_, uint512 a_, uint512 m_, uint512 r_) private view {
+    function _modinv(call512 call_, uint512 a_, uint512 m_, uint512 r_) private view {
         unchecked {
             uint512 buffer_ = _buffer(call_);
 
@@ -1443,7 +1457,7 @@ library U512 {
      * @notice Performs modular addition using the EVM precompiled contract.
      * @dev Computes `(a_ + b_) % m_` and stores the result in `r_`.
      */
-    function _modadd(call call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) private view {
+    function _modadd(call512 call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) private view {
         unchecked {
             assembly {
                 let aWord_ := mload(add(a_, 0x20))
@@ -1473,7 +1487,7 @@ library U512 {
      * @notice Performs reduced modular addition of two 512-bit unsigned integers.
      * @dev Computes `(a_ + b_) % m_` assuming `a_` and `b_` are already reduced by `m_`.
      */
-    function _redadd(call call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) private pure {
+    function _redadd(call512 call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) private pure {
         unchecked {
             uint512 buffer_ = _buffer(call_);
             bool overflowed_;
@@ -1524,7 +1538,7 @@ library U512 {
      * @notice Performs modular subtraction using the EVM precompiled contract.
      * @dev Computes `(a_ - b_) % m_` and stores the result in `r_`.
      */
-    function _modsub(call call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) private view {
+    function _modsub(call512 call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) private view {
         unchecked {
             int cmp_ = cmp(a_, b_);
 
@@ -1557,7 +1571,7 @@ library U512 {
      * @notice Performs reduced modular subtraction of two 512-bit unsigned integers.
      * @dev Computes `(a_ - b_) % m_` assuming `a_` and `b_` are already reduced by `m_`.
      */
-    function _redsub(call call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) private pure {
+    function _redsub(call512 call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) private pure {
         unchecked {
             if (cmp(a_, b_) >= 0) {
                 _sub(a_, b_, r_);
@@ -1606,7 +1620,7 @@ library U512 {
      * @dev Calculates partial products and stores them in `call_` for further processing.
      * @dev Generalizes the "muldiv" algorithm to split 512-bit unsigned integers into chunks, as detailed at https://xn--2-umb.com/21/muldiv/.
      */
-    function _modmul2p(call call_, uint512 a_, uint512 b_) private pure {
+    function _modmul2p(call512 call_, uint512 a_, uint512 b_) private pure {
         unchecked {
             assembly {
                 let a0_ := mload(a_)
@@ -1672,7 +1686,7 @@ library U512 {
      * @notice Performs modular multiplication using the EVM precompiled contract.
      * @dev Computes `(a_ * b_) % m_` and stores the result in `r_`.
      */
-    function _modmul(call call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) private view {
+    function _modmul(call512 call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) private view {
         unchecked {
             _modmul2p(call_, a_, b_);
 
@@ -1694,7 +1708,7 @@ library U512 {
      * @dev The modulus `m_` must be a prime number.
      * @dev Computes `(a_ * b_^(-1)) % m_` and stores the result in `r_`.
      */
-    function _moddiv(call call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) internal view {
+    function _moddiv(call512 call_, uint512 a_, uint512 b_, uint512 m_, uint512 r_) internal view {
         unchecked {
             uint512 buffer_ = _buffer(call_);
 
@@ -1798,7 +1812,7 @@ library U512 {
     /**
      * @notice Calculates a memory pointer for a buffer based on the provided `call_` pointer.
      */
-    function _buffer(call call_) private pure returns (uint512 buffer_) {
+    function _buffer(call512 call_) private pure returns (uint512 buffer_) {
         unchecked {
             assembly {
                 buffer_ := sub(call_, 0x40)
diff --git a/contracts/libs/crypto/ECDSA384.sol b/contracts/libs/crypto/ECDSA384.sol
index ef7d091f..52230994 100644
--- a/contracts/libs/crypto/ECDSA384.sol
+++ b/contracts/libs/crypto/ECDSA384.sol
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: MIT
 pragma solidity ^0.8.4;
 
-import {call, uint512} from "../bn/U512.sol";
+import {call512, uint512} from "../bn/U512.sol";
 import {U512} from "../bn/U512.sol";
 import {MemoryUtils} from "../utils/MemoryUtils.sol";
 
@@ -78,7 +78,7 @@ library ECDSA384 {
                 lowSmax: U512.fromBytes(curveParams_.lowSmax)
             });
 
-            call call_ = U512.initCall();
+            call512 call_ = U512.initCall();
 
             /// accept s only from the lower part of the curve
             if (
@@ -138,7 +138,7 @@ library ECDSA384 {
      * @dev Check if a point in affine coordinates is on the curve.
      */
     function _isOnCurve(
-        call call_,
+        call512 call_,
         uint512 p_,
         uint512 a_,
         uint512 b_,
@@ -169,7 +169,7 @@ library ECDSA384 {
      * @dev Compute the Strauss-Shamir double scalar multiplication scalar1*G + scalar2*H.
      */
     function _doubleScalarMultiplication(
-        call call_,
+        call512 call_,
         uint512 p_,
         uint512 two_,
         uint512 three_,
@@ -260,7 +260,7 @@ library ECDSA384 {
      * @dev Double an elliptic curve point in affine coordinates.
      */
     function _twiceAffine(
-        call call_,
+        call512 call_,
         uint512 p_,
         uint512 two_,
         uint512 three_,
@@ -298,7 +298,7 @@ library ECDSA384 {
      * @dev Add two elliptic curve points in affine coordinates.
      */
     function _addAffine(
-        call call_,
+        call512 call_,
         uint512 p_,
         uint512 two_,
         uint512 three_,
@@ -344,7 +344,7 @@ library ECDSA384 {
     }
 
     function _precomputePointsTable(
-        call call_,
+        call512 call_,
         uint512 p_,
         uint512 two_,
         uint512 three_,
diff --git a/contracts/libs/crypto/ECDSA512.sol b/contracts/libs/crypto/ECDSA512.sol
index ff70140c..3260142e 100644
--- a/contracts/libs/crypto/ECDSA512.sol
+++ b/contracts/libs/crypto/ECDSA512.sol
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: MIT
 pragma solidity ^0.8.4;
 
-import {call, uint512} from "../bn/U512.sol";
+import {call512, uint512} from "../bn/U512.sol";
 import {U512} from "../bn/U512.sol";
 import {MemoryUtils} from "../utils/MemoryUtils.sol";
 
@@ -78,7 +78,7 @@ library ECDSA512 {
                 lowSmax: U512.fromBytes(curveParams_.lowSmax)
             });
 
-            call call_ = U512.initCall();
+            call512 call_ = U512.initCall();
 
             /// accept s only from the lower part of the curve
             if (
@@ -138,7 +138,7 @@ library ECDSA512 {
      * @dev Check if a point in affine coordinates is on the curve.
      */
     function _isOnCurve(
-        call call_,
+        call512 call_,
         uint512 p_,
         uint512 a_,
         uint512 b_,
@@ -169,7 +169,7 @@ library ECDSA512 {
      * @dev Compute the Strauss-Shamir double scalar multiplication scalar1*G + scalar2*H.
      */
     function _doubleScalarMultiplication(
-        call call_,
+        call512 call_,
         uint512 p_,
         uint512 two_,
         uint512 three_,
@@ -268,7 +268,7 @@ library ECDSA512 {
      * @dev Double an elliptic curve point in affine coordinates.
      */
     function _twiceAffine(
-        call call_,
+        call512 call_,
         uint512 p_,
         uint512 two_,
         uint512 three_,
@@ -306,7 +306,7 @@ library ECDSA512 {
      * @dev Add two elliptic curve points in affine coordinates.
      */
     function _addAffine(
-        call call_,
+        call512 call_,
         uint512 p_,
         uint512 two_,
         uint512 three_,
@@ -352,7 +352,7 @@ library ECDSA512 {
     }
 
     function _precomputePointsTable(
-        call call_,
+        call512 call_,
         uint512 p_,
         uint512 two_,
         uint512 three_,
diff --git a/contracts/mock/libs/bn/U512Mock.sol b/contracts/mock/libs/bn/U512Mock.sol
index f10aa348..9b9a4ef8 100644
--- a/contracts/mock/libs/bn/U512Mock.sol
+++ b/contracts/mock/libs/bn/U512Mock.sol
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: MIT
 pragma solidity ^0.8.4;
 
-import {call} from "../../../libs/bn/U512.sol";
+import {call512} from "../../../libs/bn/U512.sol";
 import {uint512} from "../../../libs/bn/U512.sol";
 import {U512} from "../../../libs/bn/U512.sol";
 // import "hardhat/console.sol";
@@ -77,7 +77,7 @@ contract U512Mock {
         bytes memory aBytes_,
         bytes memory mBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 m_ = U512.fromBytes(mBytes_);
@@ -104,7 +104,7 @@ contract U512Mock {
         bytes memory aBytes_,
         bytes memory mBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 m_ = U512.fromBytes(mBytes_);
@@ -119,7 +119,7 @@ contract U512Mock {
         bytes memory mBytes_,
         bytes memory toBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 m_ = U512.fromBytes(mBytes_);
@@ -134,7 +134,7 @@ contract U512Mock {
         bytes memory aBytes_,
         bytes memory mBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 m_ = U512.fromBytes(mBytes_);
@@ -161,7 +161,7 @@ contract U512Mock {
         bytes memory aBytes_,
         bytes memory mBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 m_ = U512.fromBytes(mBytes_);
@@ -176,7 +176,7 @@ contract U512Mock {
         bytes memory mBytes_,
         bytes memory toBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 m_ = U512.fromBytes(mBytes_);
@@ -315,7 +315,7 @@ contract U512Mock {
         bytes memory bBytes_,
         bytes memory mBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
@@ -346,7 +346,7 @@ contract U512Mock {
         bytes memory bBytes_,
         bytes memory mBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
@@ -363,7 +363,7 @@ contract U512Mock {
         bytes memory mBytes_,
         bytes memory toBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
@@ -380,7 +380,7 @@ contract U512Mock {
         bytes memory bBytes_,
         bytes memory mBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
@@ -411,7 +411,7 @@ contract U512Mock {
         bytes memory bBytes_,
         bytes memory mBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
@@ -428,7 +428,7 @@ contract U512Mock {
         bytes memory mBytes_,
         bytes memory toBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
@@ -445,7 +445,7 @@ contract U512Mock {
         bytes memory bBytes_,
         bytes memory mBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
@@ -476,7 +476,7 @@ contract U512Mock {
         bytes memory bBytes_,
         bytes memory mBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
@@ -493,7 +493,7 @@ contract U512Mock {
         bytes memory mBytes_,
         bytes memory toBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
@@ -510,7 +510,7 @@ contract U512Mock {
         bytes memory bBytes_,
         bytes memory mBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
@@ -541,7 +541,7 @@ contract U512Mock {
         bytes memory bBytes_,
         bytes memory mBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
@@ -558,7 +558,7 @@ contract U512Mock {
         bytes memory mBytes_,
         bytes memory toBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
@@ -575,7 +575,7 @@ contract U512Mock {
         bytes memory bBytes_,
         bytes memory mBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
@@ -606,7 +606,7 @@ contract U512Mock {
         bytes memory bBytes_,
         bytes memory mBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
@@ -623,7 +623,7 @@ contract U512Mock {
         bytes memory mBytes_,
         bytes memory toBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
@@ -640,7 +640,7 @@ contract U512Mock {
         bytes memory bBytes_,
         bytes memory mBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
@@ -671,7 +671,7 @@ contract U512Mock {
         bytes memory bBytes_,
         bytes memory mBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
@@ -688,7 +688,7 @@ contract U512Mock {
         bytes memory mBytes_,
         bytes memory toBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
@@ -705,7 +705,7 @@ contract U512Mock {
         uint256 b_,
         bytes memory mBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 m_ = U512.fromBytes(mBytes_);
@@ -734,7 +734,7 @@ contract U512Mock {
         uint256 b_,
         bytes memory mBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 m_ = U512.fromBytes(mBytes_);
@@ -750,7 +750,7 @@ contract U512Mock {
         bytes memory mBytes_,
         bytes memory toBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 m_ = U512.fromBytes(mBytes_);
@@ -766,7 +766,7 @@ contract U512Mock {
         bytes memory bBytes_,
         bytes memory mBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
@@ -797,7 +797,7 @@ contract U512Mock {
         bytes memory bBytes_,
         bytes memory mBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);
@@ -814,7 +814,7 @@ contract U512Mock {
         bytes memory mBytes_,
         bytes memory toBytes_
     ) external view returns (bytes memory rBytes_) {
-        call call_ = U512.initCall();
+        call512 call_ = U512.initCall();
 
         uint512 a_ = U512.fromBytes(aBytes_);
         uint512 b_ = U512.fromBytes(bBytes_);

From d053dd7bab480c6084e3a047d2d5b790d2bf275b Mon Sep 17 00:00:00 2001
From: Artem Chystiakov <artem.ch31@gmail.com>
Date: Tue, 28 Jan 2025 13:24:55 +0200
Subject: [PATCH 42/42] update readme

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 9505ef34..af367877 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@
 Solidity modules and utilities that **go far beyond mediocre solidity**.
 
 - Implementation of the [**Contracts Registry**](https://eips.ethereum.org/EIPS/eip-6224) pattern
-- State-of-the-art cryptography primitives (**ECDSA over 256-bit and 384-bit curves**, **RSASSA-PSS**)
+- State-of-the-art cryptography primitives (**ECDSA over 256-bit, 384-bit, and 512-bit curves**, **RSASSA-PSS**)
 - Advanced data structures (**Vector**, **DynamicSet**, **PriorityQueue**, **AVLTree**)
 - ZK-friendly [**Cartesian Merkle Tree**](https://medium.com/@Arvolear/cartesian-merkle-tree-the-new-breed-a30b005ecf27), [**Sparse Merkle Tree**](https://docs.iden3.io/publications/pdfs/Merkle-Tree.pdf), and [**Incremental Merkle Tree**](https://github.com/runtimeverification/deposit-contract-verification/blob/master/deposit-contract-verification.pdf) implementations
 - Versatile access control smart contracts (**Merkle whitelists**, **RBAC**)
@@ -18,6 +18,7 @@ Solidity modules and utilities that **go far beyond mediocre solidity**.
 - Flexible finance instruments (**Staking**, **Vesting**)
 - Robust UniswapV2 and UniswapV3 oracles
 - Lightweight SBT implementation
+- Hyperoptimized **uint512** BigInt library
 - Utilities to ease work with memory, types, ERC20 decimals, arrays, sets, and ZK proofs
 
 Built leveraging [OpenZeppelin Contracts](https://github.com/OpenZeppelin/openzeppelin-contracts) (4.9.6).