Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.

Commit 9d192d9

Browse files
committed
Initial coarse pass
1 parent 8f2a9ae commit 9d192d9

File tree

4 files changed

+166
-96
lines changed

4 files changed

+166
-96
lines changed

crates/libm-test/src/f8_impl.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,13 @@ impl Float for f8 {
3939
const NEG_PI: Self = Self::ZERO;
4040
const FRAC_PI_2: Self = Self::ZERO;
4141

42+
/// `2^sig_bits`
43+
const TWO_POW_SIG_BITS: Self =
44+
Self(((Self::SIG_BITS + Self::EXP_BIAS) as Self::Int) << Self::SIG_BITS);
45+
/// `2^-sig_bits`
46+
const TWO_POW_NEG_SIG_BITS: Self =
47+
Self(((-(Self::SIG_BITS as i32) + Self::EXP_BIAS as i32) as Self::Int) << Self::SIG_BITS);
48+
4249
const BITS: u32 = 8;
4350
const SIG_BITS: u32 = 3;
4451
const SIGN_MASK: Self::Int = 0b1_0000_000;

etc/update-api-list.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525

2626
# These files do not trigger a retest.
2727
IGNORED_SOURCES = ["src/libm_helper.rs"]
28+
# Same as above, limited to specific functions
29+
IGNORED_SOURCES_MAP = {"fma": ["src/math/cbrt.rs"]}
2830

2931
IndexTy: TypeAlias = dict[str, dict[str, Any]]
3032
"""Type of the `index` item in rustdoc's JSON output"""
@@ -138,6 +140,8 @@ def _init_defs(self, index: IndexTy) -> None:
138140

139141
for src in IGNORED_SOURCES:
140142
sources.discard(src)
143+
for src in IGNORED_SOURCES_MAP.get(name, []):
144+
sources.discard(src)
141145

142146
# Sort the set
143147
self.defs = {k: sorted(v) for (k, v) in defs.items()}

src/math/cbrt.rs

Lines changed: 141 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -5,208 +5,253 @@
55
*/
66

77
use super::Float;
8-
use super::support::{FpResult, Round, cold_path};
8+
use super::support::{CastFrom, FpResult, Int, MinInt, Round, cold_path};
99

1010
/// Compute the cube root of the argument.
1111
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
1212
pub fn cbrt(x: f64) -> f64 {
1313
cbrt_round(x, Round::Nearest).val
1414
}
1515

16-
pub fn cbrt_round(x: f64, round: Round) -> FpResult<f64> {
17-
const ESCALE: [f64; 3] = [
18-
1.0,
19-
hf64!("0x1.428a2f98d728bp+0"), /* 2^(1/3) */
20-
hf64!("0x1.965fea53d6e3dp+0"), /* 2^(2/3) */
21-
];
22-
23-
/* the polynomial c0+c1*x+c2*x^2+c3*x^3 approximates x^(1/3) on [1,2]
24-
with maximal error < 9.2e-5 (attained at x=2) */
25-
const C: [f64; 4] = [
26-
hf64!("0x1.1b0babccfef9cp-1"),
27-
hf64!("0x1.2c9a3e94d1da5p-1"),
28-
hf64!("-0x1.4dc30b1a1ddbap-3"),
29-
hf64!("0x1.7a8d3e4ec9b07p-6"),
30-
];
31-
32-
let u0: f64 = hf64!("0x1.5555555555555p-2");
33-
let u1: f64 = hf64!("0x1.c71c71c71c71cp-3");
34-
35-
let rsc = [1.0, -1.0, 0.5, -0.5, 0.25, -0.25];
36-
37-
let off = [hf64!("0x1p-53"), 0.0, 0.0, 0.0];
16+
pub fn cbrt_round<F: Float + CbrtHelper>(x: F, round: Round) -> FpResult<F>
17+
where
18+
F::Int: CastFrom<u64>,
19+
F::Int: CastFrom<u32>,
20+
F::Int: From<u8>,
21+
{
22+
let zero = F::Int::ZERO;
23+
let one = F::Int::ONE;
24+
let u0: F = F::U0;
25+
let u1: F = F::U1;
26+
let rsc = F::RSC;
27+
let off = F::OFF;
3828

3929
/* rm=0 for rounding to nearest, and other values for directed roundings */
40-
let hx: u64 = x.to_bits();
41-
let mut mant: u64 = hx & f64::SIG_MASK;
42-
let sign: u64 = hx >> 63;
30+
let hx = x.to_bits();
31+
let mut mant: F::Int = hx & F::SIG_MASK;
32+
let sign: F::Int = hx >> (F::BITS - 1);
33+
let neg = x.is_sign_negative();
4334

44-
let mut e: u32 = (hx >> f64::SIG_BITS) as u32 & f64::EXP_SAT;
35+
let mut e: u32 = x.exp();
4536

46-
if ((e + 1) & f64::EXP_SAT) < 2 {
37+
if ((e + 1) & F::EXP_SAT) < 2 {
4738
cold_path();
4839

49-
let ix: u64 = hx & !f64::SIGN_MASK;
40+
let ix = hx & !F::SIGN_MASK;
5041

5142
/* 0, inf, nan: we return x + x instead of simply x,
5243
to that for x a signaling NaN, it correctly triggers
5344
the invalid exception. */
54-
if e == f64::EXP_SAT || ix == 0 {
45+
if e == F::EXP_SAT || ix == zero {
5546
return FpResult::ok(x + x);
5647
}
5748

5849
let nz = ix.leading_zeros() - 11; /* subnormal */
5950
mant <<= nz;
60-
mant &= f64::SIG_MASK;
51+
mant &= F::SIG_MASK;
6152
e = e.wrapping_sub(nz - 1);
6253
}
6354

6455
e = e.wrapping_add(3072);
65-
let cvt1: u64 = mant | (0x3ffu64 << 52);
66-
let mut cvt5: u64 = cvt1;
56+
let cvt1 = mant | (F::Int::cast_from(F::EXP_BIAS) << F::SIG_BITS);
57+
let mut cvt5 = cvt1;
6758

6859
let et: u32 = e / 3;
6960
let it: u32 = e % 3;
7061

7162
/* 2^(3k+it) <= x < 2^(3k+it+1), with 0 <= it <= 3 */
72-
cvt5 += u64::from(it) << f64::SIG_BITS;
73-
cvt5 |= sign << 63;
74-
let zz: f64 = f64::from_bits(cvt5);
63+
cvt5 += F::Int::cast_from(it) << F::SIG_BITS;
64+
cvt5 |= sign << (F::BITS - 1);
65+
let zz: F = F::from_bits(cvt5);
7566

7667
/* cbrt(x) = cbrt(zz)*2^(et-1365) where 1 <= zz < 8 */
77-
let mut isc: u64 = ESCALE[it as usize].to_bits(); // todo: index
78-
isc |= sign << 63;
79-
let cvt2: u64 = isc;
80-
let z: f64 = f64::from_bits(cvt1);
68+
let mut isc = F::ESCALE[it as usize].to_bits(); // todo: index
69+
isc |= sign << (F::BITS - 1);
70+
let cvt2 = isc;
71+
let z: F = F::from_bits(cvt1);
8172

8273
/* cbrt(zz) = cbrt(z)*isc, where isc encodes 1, 2^(1/3) or 2^(2/3),
8374
and 1 <= z < 2 */
84-
let r: f64 = 1.0 / z;
85-
let rr: f64 = r * rsc[((it as usize) << 1) | sign as usize];
86-
let z2: f64 = z * z;
87-
let c0: f64 = C[0] + z * C[1];
88-
let c2: f64 = C[2] + z * C[3];
89-
let mut y: f64 = c0 + z2 * c2;
90-
let mut y2: f64 = y * y;
75+
let r: F = F::ONE / z;
76+
let rr: F = r * rsc[((it as usize) << 1) | neg as usize];
77+
let z2: F = z * z;
78+
let c0: F = F::C[0] + z * F::C[1];
79+
let c2: F = F::C[2] + z * F::C[3];
80+
let mut y: F = c0 + z2 * c2;
81+
let mut y2: F = y * y;
9182

9283
/* y is an approximation of z^(1/3) */
93-
let mut h: f64 = y2 * (y * r) - 1.0;
84+
let mut h: F = y2 * (y * r) - F::ONE;
9485

9586
/* h determines the error between y and z^(1/3) */
9687
y -= (h * y) * (u0 - u1 * h);
9788

9889
/* The correction y -= (h*y)*(u0 - u1*h) corresponds to a cubic variant
9990
of Newton's method, with the function f(y) = 1-z/y^3. */
100-
y *= f64::from_bits(cvt2);
91+
y *= F::from_bits(cvt2);
10192

10293
/* Now y is an approximation of zz^(1/3),
10394
* and rr an approximation of 1/zz. We now perform another iteration of
10495
* Newton-Raphson, this time with a linear approximation only. */
10596
y2 = y * y;
106-
let mut y2l: f64 = fmaf64(y, y, -y2);
97+
let mut y2l: F = y.fma(y, -y2);
10798

10899
/* y2 + y2l = y^2 exactly */
109-
let mut y3: f64 = y2 * y;
110-
let mut y3l: f64 = fmaf64(y, y2, -y3) + y * y2l;
100+
let mut y3: F = y2 * y;
101+
let mut y3l: F = y.fma(y2, -y3) + y * y2l;
111102

112103
/* y3 + y3l approximates y^3 with about 106 bits of accuracy */
113104
h = ((y3 - zz) + y3l) * rr;
114-
let mut dy: f64 = h * (y * u0);
105+
let mut dy: F = h * (y * u0);
115106

116107
/* the approximation of zz^(1/3) is y - dy */
117-
let mut y1: f64 = y - dy;
108+
let mut y1: F = y - dy;
118109
dy = (y - y1) - dy;
119110

120111
/* the approximation of zz^(1/3) is now y1 + dy, where |dy| < 1/2 ulp(y)
121112
* (for rounding to nearest) */
122-
let mut ady: f64 = dy.abs();
113+
let mut ady: F = dy.abs();
123114

124115
/* For directed roundings, ady0 is tiny when dy is tiny, or ady0 is near
125116
* from ulp(1);
126117
* for rounding to nearest, ady0 is tiny when dy is near from 1/2 ulp(1),
127118
* or from 3/2 ulp(1). */
128-
let mut ady0: f64 = (ady - off[round as usize]).abs();
129-
let mut ady1: f64 = (ady - (hf64!("0x1p-52") + off[round as usize])).abs();
119+
let mut ady0: F = (ady - off[round as usize]).abs();
120+
let mut ady1: F = (ady - (F::TWO_POW_NEG_SIG_BITS + off[round as usize])).abs();
130121

131-
if ady0 < hf64!("0x1p-75") || ady1 < hf64!("0x1p-75") {
122+
let magic = F::from_parts(false, (-75 + F::EXP_BIAS as i32) as u32, zero);
123+
124+
if ady0 < magic || ady1 < magic {
132125
cold_path();
133126

134127
y2 = y1 * y1;
135-
y2l = fmaf64(y1, y1, -y2);
128+
y2l = y1.fma(y1, -y2);
136129
y3 = y2 * y1;
137-
y3l = fmaf64(y1, y2, -y3) + y1 * y2l;
130+
y3l = y1.fma(y2, -y3) + y1 * y2l;
138131
h = ((y3 - zz) + y3l) * rr;
139132
dy = h * (y1 * u0);
140133
y = y1 - dy;
141134
dy = (y1 - y) - dy;
142135
y1 = y;
143136
ady = dy.abs();
144137
ady0 = (ady - off[round as usize]).abs();
145-
ady1 = (ady - (hf64!("0x1p-52") + off[round as usize])).abs();
138+
ady1 = (ady - (F::TWO_POW_NEG_SIG_BITS + off[round as usize])).abs();
146139

147-
if ady0 < hf64!("0x1p-98") || ady1 < hf64!("0x1p-98") {
140+
let magic2 = F::from_parts(false, (-98 + F::EXP_BIAS as i32) as u32, zero);
141+
if ady0 < magic2 || ady1 < magic2 {
148142
cold_path();
149-
let azz: f64 = zz.abs();
143+
let azz: F = zz.abs();
150144

151145
// ~ 0x1.79d15d0e8d59b80000000000000ffc3dp+0
152-
if azz == hf64!("0x1.9b78223aa307cp+1") {
153-
y1 = hf64!("0x1.79d15d0e8d59cp+0").copysign(zz);
146+
if azz == F::AZMAGIC1 {
147+
y1 = F::AZMAGIC2.copysign(zz);
154148
}
155149

156150
// ~ 0x1.de87aa837820e80000000000001c0f08p+0
157-
if azz == hf64!("0x1.a202bfc89ddffp+2") {
158-
y1 = hf64!("0x1.de87aa837820fp+0").copysign(zz);
151+
if azz == F::AZMAGIC3 {
152+
y1 = F::AZMAGIC4.copysign(zz);
159153
}
160154

161155
if round != Round::Nearest {
162-
let wlist = [
163-
(hf64!("0x1.3a9ccd7f022dbp+0"), hf64!("0x1.1236160ba9b93p+0")), // ~ 0x1.1236160ba9b930000000000001e7e8fap+0
164-
(hf64!("0x1.7845d2faac6fep+0"), hf64!("0x1.23115e657e49cp+0")), // ~ 0x1.23115e657e49c0000000000001d7a799p+0
165-
(hf64!("0x1.d1ef81cbbbe71p+0"), hf64!("0x1.388fb44cdcf5ap+0")), // ~ 0x1.388fb44cdcf5a0000000000002202c55p+0
166-
(hf64!("0x1.0a2014f62987cp+1"), hf64!("0x1.46bcbf47dc1e8p+0")), // ~ 0x1.46bcbf47dc1e8000000000000303aa2dp+0
167-
(hf64!("0x1.fe18a044a5501p+1"), hf64!("0x1.95decfec9c904p+0")), // ~ 0x1.95decfec9c9040000000000000159e8ep+0
168-
(hf64!("0x1.a6bb8c803147bp+2"), hf64!("0x1.e05335a6401dep+0")), // ~ 0x1.e05335a6401de00000000000027ca017p+0
169-
(hf64!("0x1.ac8538a031cbdp+2"), hf64!("0x1.e281d87098de8p+0")), // ~ 0x1.e281d87098de80000000000000ee9314p+0
170-
];
171-
172-
for (a, b) in wlist {
156+
for (a, b) in F::WLIST {
173157
if azz == a {
174-
let tmp = if round as u64 + sign == 2 { hf64!("0x1p-52") } else { 0.0 };
158+
let tmp = if F::Int::from(round as u8) + sign == F::Int::cast_from(2) {
159+
F::TWO_POW_NEG_SIG_BITS
160+
} else {
161+
F::ZERO
162+
};
175163
y1 = (b + tmp).copysign(zz);
176164
}
177165
}
178166
}
179167
}
180168
}
181169

182-
let mut cvt3: u64 = y1.to_bits();
183-
cvt3 = cvt3.wrapping_add(((et.wrapping_sub(342).wrapping_sub(1023)) as u64) << 52);
184-
let m0: u64 = cvt3 << 30;
170+
let mut cvt3 = y1.to_bits();
171+
cvt3 = cvt3.wrapping_add((F::Int::cast_from(et.wrapping_sub(342).wrapping_sub(1023))) << 52);
172+
let m0 = cvt3 << 30;
185173
let m1 = m0 >> 63;
186174

187-
if (m0 ^ m1) <= (1u64 << 30) {
175+
if (m0 ^ m1) <= (one << 30) {
188176
cold_path();
189177

190-
let mut cvt4: u64 = y1.to_bits();
191-
cvt4 = (cvt4 + (164 << 15)) & 0xffffffffffff0000u64;
178+
let mut cvt4 = y1.to_bits();
179+
cvt4 = (cvt4 + (F::Int::cast_from(164) << 15)) & F::Int::cast_from(0xffffffffffff0000u64);
192180

193-
if ((f64::from_bits(cvt4) - y1) - dy).abs() < hf64!("0x1p-60") || (zz).abs() == 1.0 {
194-
cvt3 = (cvt3 + (1u64 << 15)) & 0xffffffffffff0000u64;
181+
let magic3 = F::from_parts(false, (-60 + F::EXP_BIAS as i32) as u32, zero);
182+
if ((F::from_bits(cvt4) - y1) - dy).abs() < magic3 || (zz).abs() == F::ONE {
183+
cvt3 = (cvt3 + (one << 15)) & F::Int::cast_from(0xffffffffffff0000u64);
195184
}
196185
}
197186

198-
FpResult::ok(f64::from_bits(cvt3))
187+
FpResult::ok(F::from_bits(cvt3))
199188
}
200189

201-
fn fmaf64(x: f64, y: f64, z: f64) -> f64 {
202-
#[cfg(intrinsics_enabled)]
203-
{
204-
return unsafe { core::intrinsics::fmaf64(x, y, z) };
205-
}
190+
trait CbrtHelper: Float {
191+
/// 2^(n / 3) for n = [0, 1, 2]
192+
const ESCALE: [Self; 3];
193+
/// The polynomial `c0+c1*x+c2*x^2+c3*x^3` approximates `x^(1/3)` on `[1,2]`
194+
/// with maximal error < 9.2e-5 (attained at x=2)
195+
const C: [Self; 4];
196+
const U0: Self;
197+
const U1: Self;
198+
const RSC: [Self; 6];
199+
const OFF: [Self; 4];
200+
const WLIST: [(Self, Self); 7];
201+
const AZMAGIC1: Self;
202+
const AZMAGIC2: Self;
203+
const AZMAGIC3: Self;
204+
const AZMAGIC4: Self;
205+
fn fma(self, y: Self, z: Self) -> Self;
206+
}
206207

207-
#[cfg(not(intrinsics_enabled))]
208-
{
209-
return super::fma(x, y, z);
208+
impl CbrtHelper for f64 {
209+
const ESCALE: [Self; 3] = [
210+
1.0,
211+
hf64!("0x1.428a2f98d728bp+0"), /* 2^(1/3) */
212+
hf64!("0x1.965fea53d6e3dp+0"), /* 2^(2/3) */
213+
];
214+
215+
const C: [Self; 4] = [
216+
hf64!("0x1.1b0babccfef9cp-1"),
217+
hf64!("0x1.2c9a3e94d1da5p-1"),
218+
hf64!("-0x1.4dc30b1a1ddbap-3"),
219+
hf64!("0x1.7a8d3e4ec9b07p-6"),
220+
];
221+
222+
const U0: Self = hf64!("0x1.5555555555555p-2");
223+
224+
const U1: Self = hf64!("0x1.c71c71c71c71cp-3");
225+
226+
const RSC: [Self; 6] = [1.0, -1.0, 0.5, -0.5, 0.25, -0.25];
227+
228+
const OFF: [Self; 4] = [hf64!("0x1p-53"), 0.0, 0.0, 0.0];
229+
230+
const WLIST: [(Self, Self); 7] = [
231+
(hf64!("0x1.3a9ccd7f022dbp+0"), hf64!("0x1.1236160ba9b93p+0")), // ~ 0x1.1236160ba9b930000000000001e7e8fap+0
232+
(hf64!("0x1.7845d2faac6fep+0"), hf64!("0x1.23115e657e49cp+0")), // ~ 0x1.23115e657e49c0000000000001d7a799p+0
233+
(hf64!("0x1.d1ef81cbbbe71p+0"), hf64!("0x1.388fb44cdcf5ap+0")), // ~ 0x1.388fb44cdcf5a0000000000002202c55p+0
234+
(hf64!("0x1.0a2014f62987cp+1"), hf64!("0x1.46bcbf47dc1e8p+0")), // ~ 0x1.46bcbf47dc1e8000000000000303aa2dp+0
235+
(hf64!("0x1.fe18a044a5501p+1"), hf64!("0x1.95decfec9c904p+0")), // ~ 0x1.95decfec9c9040000000000000159e8ep+0
236+
(hf64!("0x1.a6bb8c803147bp+2"), hf64!("0x1.e05335a6401dep+0")), // ~ 0x1.e05335a6401de00000000000027ca017p+0
237+
(hf64!("0x1.ac8538a031cbdp+2"), hf64!("0x1.e281d87098de8p+0")), // ~ 0x1.e281d87098de80000000000000ee9314p+0
238+
];
239+
240+
const AZMAGIC1: Self = hf64!("0x1.9b78223aa307cp+1");
241+
const AZMAGIC2: Self = hf64!("0x1.79d15d0e8d59cp+0");
242+
const AZMAGIC3: Self = hf64!("0x1.a202bfc89ddffp+2");
243+
const AZMAGIC4: Self = hf64!("0x1.de87aa837820fp+0");
244+
245+
fn fma(self, y: Self, z: Self) -> Self {
246+
#[cfg(intrinsics_enabled)]
247+
{
248+
return unsafe { core::intrinsics::fmaf64(self, y, z) };
249+
}
250+
251+
#[cfg(not(intrinsics_enabled))]
252+
{
253+
return super::fma(self, y, z);
254+
}
210255
}
211256
}
212257

0 commit comments

Comments
 (0)