Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.

Commit c4a1e83

Browse files
committed
docs
1 parent 6122d6a commit c4a1e83

File tree

1 file changed

+20
-33
lines changed

1 file changed

+20
-33
lines changed

src/math/generic/scalbn.rs

Lines changed: 20 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,5 @@
1-
#![allow(unused)]
2-
#![allow(clippy::all)]
3-
4-
use super::super::support::Hexf;
51
use super::super::{CastFrom, CastInto, Float, IntTy, MinInt};
62

7-
#[cfg(not(optimizations_enabled))]
8-
extern crate std;
9-
#[cfg(not(optimizations_enabled))]
10-
use std::dbg;
11-
12-
#[cfg(optimizations_enabled)]
13-
macro_rules! dbg {
14-
($($tt:tt)*) => {};
15-
}
16-
173
/// Scale the exponent.
184
///
195
/// From N3220:
@@ -36,7 +22,6 @@ where
3622
F::Int: CastFrom<i32>,
3723
F::Int: CastFrom<u32>,
3824
{
39-
dbg!();
4025
let zero = IntTy::<F>::ZERO;
4126

4227
// Bits including the implicit bit
@@ -55,10 +40,6 @@ where
5540
// 2 ^ sig_total_bits, representation of what can be accounted for with subnormals
5641
let f_exp_subnorm = F::from_parts(false, sig_total_bits + F::EXP_BIAS, zero);
5742

58-
dbg!(exp_max, exp_min, sig_total_bits, sig_total_bits + F::EXP_BIAS);
59-
dbg!(Hexf(f_exp_max), Hexf(f_exp_min), Hexf(f_exp_subnorm));
60-
dbg!(Hexf(x), n);
61-
6243
// The goal is to multiply `x` by a scale factor that applies `n`. However, there are cases
6344
// where `2^n` is not representable by `F` but the result should be, e.g. `x = 2^Emin` with
6445
// `n = -EMin + 2`. To get around this, reduce the magnitude of the final scale operation by
@@ -86,56 +67,62 @@ where
8667

8768
let mul = f_exp_min * f_exp_subnorm;
8869
let add = -exp_min - sig_total_bits as i32;
89-
dbg!(Hexf(mul), add);
70+
71+
// Worse case negative `n`: `x` is the maximum positive value, the result is `F::MIN`.
72+
// This can be reached by three scaling multiplications (two here and one final).
73+
debug_assert!(-exp_min + F::SIG_BITS as i32 + exp_max <= add * 2 + -exp_min);
9074

9175
x *= mul;
9276
n += add;
9377

9478
if n < exp_min {
9579
x *= mul;
9680
n += add;
81+
9782
if n < exp_min {
9883
x *= mul;
9984
n += add;
85+
10086
if n < exp_min {
10187
n = exp_min;
102-
dbg!(Hexf(x), n);
10388
}
10489
}
10590
}
10691
} else {
107-
let add = (n + sig_total_bits as i32).clamp(exp_min, sig_total_bits as i32);
108-
let mul = F::from_parts(false, (F::EXP_BIAS as i32 + add) as u32, zero);
109-
let add = -add;
110-
dbg!(add, Hexf(mul));
92+
// `f16` is unique compared to other float types in that the difference between the
93+
// minimum exponent and the significand bits (`add = -exp_min - sig_total_bits`) is
94+
// small, only three. The above method depend on decrementing `n` by `add` two times;
95+
// for other float types this works out because `add` is a substantial fraction of
96+
// the exponent range. For `f16`, however, 3 is relatively small compared to the
97+
// exponent range (which is 39), so that would require a lot of rounds.
98+
//
99+
// Work aroudn this by using a different algorithm that scales by the max possible
100+
// value that does not exceed the minimum normal exponent.
101+
102+
let add = -(n + sig_total_bits as i32).clamp(exp_min, sig_total_bits as i32);
103+
let mul = F::from_parts(false, (F::EXP_BIAS as i32 - add) as u32, zero);
111104

112105
x *= mul;
113106
n += add;
114-
dbg!(Hexf(x), n);
115107

116108
if n < exp_min {
117109
let add = (n + sig_total_bits as i32).clamp(exp_min, sig_total_bits as i32);
118110
let mul = F::from_parts(false, (F::EXP_BIAS as i32 + add) as u32, zero);
119111
let add = -add;
120-
dbg!(add, Hexf(mul));
121112

122113
x *= mul;
123114
n += add;
124-
dbg!(Hexf(x), n);
125115

126116
if n < exp_min {
127117
n = exp_min;
128-
dbg!(Hexf(x), n);
129118
}
130119
}
131120
// f16
132121
}
133122
}
134-
dbg!(Hexf(x), n);
123+
135124
let scale = F::from_parts(false, (F::EXP_BIAS as i32 + n) as u32, zero);
136-
let ret = x * scale;
137-
dbg!(Hexf(scale), Hexf(ret));
138-
ret
125+
x * scale
139126
}
140127

141128
#[cfg(test)]

0 commit comments

Comments
 (0)