Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.

Commit d655126

Browse files
committed
Start converting fma to a generic function
This is the first step toward making `fma` usable for `f128`, and possibly `f32` on platforms where growing to `f64` is not fast. This does not yet work for anything other than `f64`.
1 parent 0492f2e commit d655126

File tree

6 files changed

+278
-192
lines changed

6 files changed

+278
-192
lines changed

etc/function-definitions.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -344,13 +344,15 @@
344344
},
345345
"fma": {
346346
"sources": [
347-
"src/math/fma.rs"
347+
"src/math/fma.rs",
348+
"src/math/generic/fma.rs"
348349
],
349350
"type": "f64"
350351
},
351352
"fmaf": {
352353
"sources": [
353-
"src/math/fmaf.rs"
354+
"src/math/fmaf.rs",
355+
"src/math/generic/fma.rs"
354356
],
355357
"type": "f32"
356358
},

src/math/fma.rs

Lines changed: 3 additions & 189 deletions
Original file line numberDiff line numberDiff line change
@@ -1,195 +1,9 @@
1-
use core::{f32, f64};
2-
3-
use super::scalbn;
4-
5-
const ZEROINFNAN: i32 = 0x7ff - 0x3ff - 52 - 1;
6-
7-
struct Num {
8-
m: u64,
9-
e: i32,
10-
sign: i32,
11-
}
12-
13-
fn normalize(x: f64) -> Num {
14-
let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63
15-
16-
let mut ix: u64 = x.to_bits();
17-
let mut e: i32 = (ix >> 52) as i32;
18-
let sign: i32 = e & 0x800;
19-
e &= 0x7ff;
20-
if e == 0 {
21-
ix = (x * x1p63).to_bits();
22-
e = (ix >> 52) as i32 & 0x7ff;
23-
e = if e != 0 { e - 63 } else { 0x800 };
24-
}
25-
ix &= (1 << 52) - 1;
26-
ix |= 1 << 52;
27-
ix <<= 1;
28-
e -= 0x3ff + 52 + 1;
29-
Num { m: ix, e, sign }
30-
}
31-
32-
#[inline]
33-
fn mul(x: u64, y: u64) -> (u64, u64) {
34-
let t = (x as u128).wrapping_mul(y as u128);
35-
((t >> 64) as u64, t as u64)
36-
}
37-
38-
/// Floating multiply add (f64)
1+
/// Fused multiply add (f64)
392
///
40-
/// Computes `(x*y)+z`, rounded as one ternary operation:
41-
/// Computes the value (as if) to infinite precision and rounds once to the result format,
42-
/// according to the rounding mode characterized by the value of FLT_ROUNDS.
3+
/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
434
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
445
pub fn fma(x: f64, y: f64, z: f64) -> f64 {
45-
let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63
46-
let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); // 0x0.ffffff8p-63
47-
48-
/* normalize so top 10bits and last bit are 0 */
49-
let nx = normalize(x);
50-
let ny = normalize(y);
51-
let nz = normalize(z);
52-
53-
if nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN {
54-
return x * y + z;
55-
}
56-
if nz.e >= ZEROINFNAN {
57-
if nz.e > ZEROINFNAN {
58-
/* z==0 */
59-
return x * y + z;
60-
}
61-
return z;
62-
}
63-
64-
/* mul: r = x*y */
65-
let zhi: u64;
66-
let zlo: u64;
67-
let (mut rhi, mut rlo) = mul(nx.m, ny.m);
68-
/* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */
69-
70-
/* align exponents */
71-
let mut e: i32 = nx.e + ny.e;
72-
let mut d: i32 = nz.e - e;
73-
/* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */
74-
if d > 0 {
75-
if d < 64 {
76-
zlo = nz.m << d;
77-
zhi = nz.m >> (64 - d);
78-
} else {
79-
zlo = 0;
80-
zhi = nz.m;
81-
e = nz.e - 64;
82-
d -= 64;
83-
if d == 0 {
84-
} else if d < 64 {
85-
rlo = (rhi << (64 - d)) | (rlo >> d) | ((rlo << (64 - d)) != 0) as u64;
86-
rhi = rhi >> d;
87-
} else {
88-
rlo = 1;
89-
rhi = 0;
90-
}
91-
}
92-
} else {
93-
zhi = 0;
94-
d = -d;
95-
if d == 0 {
96-
zlo = nz.m;
97-
} else if d < 64 {
98-
zlo = (nz.m >> d) | ((nz.m << (64 - d)) != 0) as u64;
99-
} else {
100-
zlo = 1;
101-
}
102-
}
103-
104-
/* add */
105-
let mut sign: i32 = nx.sign ^ ny.sign;
106-
let samesign: bool = (sign ^ nz.sign) == 0;
107-
let mut nonzero: i32 = 1;
108-
if samesign {
109-
/* r += z */
110-
rlo = rlo.wrapping_add(zlo);
111-
rhi += zhi + (rlo < zlo) as u64;
112-
} else {
113-
/* r -= z */
114-
let (res, borrow) = rlo.overflowing_sub(zlo);
115-
rlo = res;
116-
rhi = rhi.wrapping_sub(zhi.wrapping_add(borrow as u64));
117-
if (rhi >> 63) != 0 {
118-
rlo = (rlo as i64).wrapping_neg() as u64;
119-
rhi = (rhi as i64).wrapping_neg() as u64 - (rlo != 0) as u64;
120-
sign = (sign == 0) as i32;
121-
}
122-
nonzero = (rhi != 0) as i32;
123-
}
124-
125-
/* set rhi to top 63bit of the result (last bit is sticky) */
126-
if nonzero != 0 {
127-
e += 64;
128-
d = rhi.leading_zeros() as i32 - 1;
129-
/* note: d > 0 */
130-
rhi = (rhi << d) | (rlo >> (64 - d)) | ((rlo << d) != 0) as u64;
131-
} else if rlo != 0 {
132-
d = rlo.leading_zeros() as i32 - 1;
133-
if d < 0 {
134-
rhi = (rlo >> 1) | (rlo & 1);
135-
} else {
136-
rhi = rlo << d;
137-
}
138-
} else {
139-
/* exact +-0 */
140-
return x * y + z;
141-
}
142-
e -= d;
143-
144-
/* convert to double */
145-
let mut i: i64 = rhi as i64; /* i is in [1<<62,(1<<63)-1] */
146-
if sign != 0 {
147-
i = -i;
148-
}
149-
let mut r: f64 = i as f64; /* |r| is in [0x1p62,0x1p63] */
150-
151-
if e < -1022 - 62 {
152-
/* result is subnormal before rounding */
153-
if e == -1022 - 63 {
154-
let mut c: f64 = x1p63;
155-
if sign != 0 {
156-
c = -c;
157-
}
158-
if r == c {
159-
/* min normal after rounding, underflow depends
160-
on arch behaviour which can be imitated by
161-
a double to float conversion */
162-
let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * r) as f32;
163-
return f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64;
164-
}
165-
/* one bit is lost when scaled, add another top bit to
166-
only round once at conversion if it is inexact */
167-
if (rhi << 53) != 0 {
168-
i = ((rhi >> 1) | (rhi & 1) | (1 << 62)) as i64;
169-
if sign != 0 {
170-
i = -i;
171-
}
172-
r = i as f64;
173-
r = 2. * r - c; /* remove top bit */
174-
175-
/* raise underflow portably, such that it
176-
cannot be optimized away */
177-
{
178-
let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * r;
179-
r += (tiny * tiny) * (r - r);
180-
}
181-
}
182-
} else {
183-
/* only round once when scaled */
184-
d = 10;
185-
i = (((rhi >> d) | ((rhi << (64 - d)) != 0) as u64) << d) as i64;
186-
if sign != 0 {
187-
i = -i;
188-
}
189-
r = i as f64;
190-
}
191-
}
192-
scalbn(r, e)
6+
return super::generic::fma(x, y, z);
1937
}
1948

1959
#[cfg(test)]

0 commit comments

Comments
 (0)