@@ -853,30 +853,65 @@ static void secp256k1_scalar_sqr_512(uint64_t l[8], const secp256k1_scalar *a) {
853
853
: "S" (l ), "D" (a -> d )
854
854
: "rax" , "rdx" , "r8" , "r9" , "r10" , "r11" , "r12" , "r13" , "r14" , "cc" , "memory" );
855
855
#else
856
- /* 160 bit accumulator. */
857
- uint64_t c0 = 0 , c1 = 0 ;
858
- uint32_t c2 = 0 ;
859
856
860
- /* l[0..7] = a[0..3] * b[0..3]. */
861
- muladd_fast (a -> d [0 ], a -> d [0 ]);
862
- extract_fast (l [0 ]);
863
- muladd2 (a -> d [0 ], a -> d [1 ]);
864
- extract (l [1 ]);
865
- muladd2 (a -> d [0 ], a -> d [2 ]);
866
- muladd (a -> d [1 ], a -> d [1 ]);
867
- extract (l [2 ]);
868
- muladd2 (a -> d [0 ], a -> d [3 ]);
869
- muladd2 (a -> d [1 ], a -> d [2 ]);
870
- extract (l [3 ]);
871
- muladd2 (a -> d [1 ], a -> d [3 ]);
872
- muladd (a -> d [2 ], a -> d [2 ]);
873
- extract (l [4 ]);
874
- muladd2 (a -> d [2 ], a -> d [3 ]);
875
- extract (l [5 ]);
876
- muladd_fast (a -> d [3 ], a -> d [3 ]);
877
- extract_fast (l [6 ]);
878
- VERIFY_CHECK (c1 == 0 );
879
- l [7 ] = c0 ;
857
+ const uint64_t * d = & a -> d [0 ];
858
+ uint64_t d0 = d [0 ], d1 = d [1 ], d2 = d [2 ], d3 = d [3 ];
859
+
860
+ uint128_t c , u , v ;
861
+ uint64_t w ;
862
+
863
+ c = (uint128_t )d0 * d0 ;
864
+ l [0 ] = (uint64_t )c ; c >>= 64 ;
865
+
866
+ u = (uint128_t )d0 * d1 ;
867
+ w = (uint64_t )u ; u >>= 64 ;
868
+ c += w ;
869
+ c += w ;
870
+ l [1 ] = (uint64_t )c ; c >>= 64 ;
871
+
872
+ v = (uint128_t )d1 * d1 ;
873
+ c += (uint64_t )v ; v >>= 64 ;
874
+ u += (uint128_t )d0 * d2 ;
875
+ w = (uint64_t )u ; u >>= 64 ;
876
+ c += w ;
877
+ c += w ;
878
+ l [2 ] = (uint64_t )c ; c >>= 64 ;
879
+
880
+ c += (uint64_t )v ;
881
+ v = (uint128_t )d1 * d2 ;
882
+ u += (uint128_t )d0 * d3 ;
883
+ u += (uint64_t )v ; v >>= 64 ;
884
+ w = (uint64_t )u ; u >>= 64 ;
885
+ c += w ;
886
+ c += w ;
887
+ l [3 ] = (uint64_t )c ; c >>= 64 ;
888
+
889
+ u += (uint64_t )v ;
890
+ v = (uint128_t )d2 * d2 ;
891
+ c += (uint64_t )v ; v >>= 64 ;
892
+ u += (uint128_t )d1 * d3 ;
893
+ w = (uint64_t )u ; u >>= 64 ;
894
+ c += w ;
895
+ c += w ;
896
+ l [4 ] = (uint64_t )c ; c >>= 64 ;
897
+
898
+ c += (uint64_t )v ;
899
+ u += (uint128_t )d2 * d3 ;
900
+ w = (uint64_t )u ; u >>= 64 ;
901
+ c += w ;
902
+ c += w ;
903
+ l [5 ] = (uint64_t )c ; c >>= 64 ;
904
+
905
+ v = (uint128_t )d3 * d3 ;
906
+ c += (uint64_t )v ; v >>= 64 ;
907
+ w = (uint64_t )u ;
908
+ c += w ;
909
+ c += w ;
910
+ l [6 ] = (uint64_t )c ; c >>= 64 ;
911
+
912
+ VERIFY_CHECK (((v + c ) >> 64 ) == 0 );
913
+ l [7 ] = (uint64_t )v + (uint64_t )c ;
914
+
880
915
#endif
881
916
}
882
917
0 commit comments