@@ -1175,8 +1175,112 @@ bi_fintrinsic(div,div_float)
1175
1175
bi_fintrinsic (frem ,rem_float )
1176
1176
1177
1177
// ternary operators //
1178
+ // runtime fma is broken on windows, define julia_fma(f) ourself with fma_emulated as reference.
1179
+ #if defined(_OS_WINDOWS_ )
1180
+ // reinterpret(UInt64, ::Float64)
1181
+ uint64_t bitcast_d2u (double d ) {
1182
+ uint64_t r ;
1183
+ memcpy (& r , & d , 8 );
1184
+ return r ;
1185
+ }
1186
+ // reinterpret(Float64, ::UInt64)
1187
+ double bitcast_u2d (uint64_t d ) {
1188
+ double r ;
1189
+ memcpy (& r , & d , 8 );
1190
+ return r ;
1191
+ }
1192
+ // Base.splitbits(::Float64)
1193
+ void splitbits (double * hi , double * lo , double d ) {
1194
+ * hi = bitcast_u2d (bitcast_d2u (d ) & 0xfffffffff8000000 );
1195
+ * lo = d - * hi ;
1196
+ }
1197
+ // Base.exponent(::Float64)
1198
+ int exponent (double a ) {
1199
+ int e ;
1200
+ frexp (a , & e );
1201
+ return e - 1 ;
1202
+ }
1203
+ // Base.fma_emulated(::Float32, ::Float32, ::Float32)
1204
+ float julia_fmaf (float a , float b , float c ) {
1205
+ double ab , res ;
1206
+ ab = (double )a * b ;
1207
+ res = ab + (double )c ;
1208
+ if ((bitcast_d2u (res ) & 0x1fffffff ) == 0x10000000 ){
1209
+ double reslo = fabsf (c ) > fabs (ab ) ? ab - (res - c ) : c - (res - ab );
1210
+ if (reslo != 0 )
1211
+ res = nextafter (res , copysign (1.0 /0.0 , reslo ));
1212
+ }
1213
+ return (float )res ;
1214
+ }
1215
+ // Base.twomul(::Float64, ::Float64)
1216
+ void two_mul (double * abhi , double * ablo , double a , double b ) {
1217
+ double ahi , alo , bhi , blo , blohi , blolo ;
1218
+ splitbits (& ahi , & alo , a );
1219
+ splitbits (& bhi , & blo , b );
1220
+ splitbits (& blohi , & blolo , blo );
1221
+ * abhi = a * b ;
1222
+ * ablo = alo * blohi - (((* abhi - ahi * bhi ) - alo * bhi ) - ahi * blo ) + blolo * alo ;
1223
+ }
1224
+ // Base.issubnormal(::Float64) (Win32's fpclassify seems broken)
1225
+ int issubnormal (double d ) {
1226
+ uint64_t y = bitcast_d2u (d );
1227
+ return ((y & 0x7ff0000000000000 ) == 0 ) & ((y & 0x000fffffffffffff ) != 0 );
1228
+ }
1229
+ #if defined(_WIN32 )
1230
+ // Win32 needs volatile (avoid over optimization?)
1231
+ #define VDOUBLE volatile double
1232
+ #else
1233
+ #define VDOUBLE double
1234
+ #endif
1235
+
1236
+ // Base.fma_emulated(::Float64, ::Float64, ::Float64)
1237
+ double julia_fma (double a , double b , double c ) {
1238
+ double abhi , ablo , r , s ;
1239
+ two_mul (& abhi , & ablo , a , b );
1240
+ if (!isfinite (abhi + c ) || fabs (abhi ) < 2.0041683600089732e-292 ||
1241
+ issubnormal (a ) || issubnormal (b )) {
1242
+ int aandbfinite = isfinite (a ) && isfinite (b );
1243
+ if (!(aandbfinite && isfinite (c )))
1244
+ return aandbfinite ? c : abhi + c ;
1245
+ if (a == 0 || b == 0 )
1246
+ return abhi + c ;
1247
+ int bias = exponent (a ) + exponent (b );
1248
+ VDOUBLE c_denorm = ldexp (c , - bias );
1249
+ if (isfinite (c_denorm )) {
1250
+ if (issubnormal (a ))
1251
+ a *= 4.503599627370496e15 ;
1252
+ if (issubnormal (b ))
1253
+ b *= 4.503599627370496e15 ;
1254
+ a = bitcast_u2d ((bitcast_d2u (a ) & 0x800fffffffffffff ) | 0x3ff0000000000000 );
1255
+ b = bitcast_u2d ((bitcast_d2u (b ) & 0x800fffffffffffff ) | 0x3ff0000000000000 );
1256
+ c = c_denorm ;
1257
+ two_mul (& abhi , & ablo , a , b );
1258
+ r = abhi + c ;
1259
+ s = (fabs (abhi ) > fabs (c )) ? (abhi - r + c + ablo ) : (c - r + abhi + ablo );
1260
+ double sumhi = r + s ;
1261
+ if (issubnormal (ldexp (sumhi , bias ))) {
1262
+ double sumlo = r - sumhi + s ;
1263
+ int bits_lost = - bias - exponent (sumhi )- 1022 ;
1264
+ if ((bits_lost != 1 ) ^ ((bitcast_d2u (sumhi )& 1 ) == 1 ))
1265
+ if (sumlo != 0 )
1266
+ sumhi = nextafter (sumhi , copysign (1.0 /0.0 , sumlo ));
1267
+ }
1268
+ return ldexp (sumhi , bias );
1269
+ }
1270
+ if (isinf (abhi ) && signbit (c ) == signbit (a * b ))
1271
+ return abhi ;
1272
+ }
1273
+ r = abhi + c ;
1274
+ s = (fabs (abhi ) > fabs (c )) ? (abhi - r + c + ablo ) : (c - r + abhi + ablo );
1275
+ return r + s ;
1276
+ }
1277
+ #define fma (a , b , c ) \
1278
+ sizeof(a) == sizeof(float) ? julia_fmaf(a, b, c) : julia_fma(a, b, c)
1279
+ #else // On other systems use fma(f) directly
1178
1280
#define fma (a , b , c ) \
1179
1281
sizeof(a) == sizeof(float) ? fmaf(a, b, c) : fma(a, b, c)
1282
+ #endif
1283
+
1180
1284
#define muladd (a , b , c ) a * b + c
1181
1285
ter_fintrinsic (fma ,fma_float )
1182
1286
ter_fintrinsic (muladd ,muladd_float )
0 commit comments