-
Notifications
You must be signed in to change notification settings - Fork 99
implement conversion between half and single precision for denormals #1927
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
709d320
39e76b1
e43238b
05f7e86
3af5540
fbc7b3b
312ef7e
fc1ad57
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -415,8 +415,42 @@ class alignas(std::uint16_t) half { | |||||||||||||
if (f16_traits::is_inf(exp)) { | ||||||||||||||
return conv::shift_sign(data_) | exp; | ||||||||||||||
} else if (f16_traits::is_denom(exp)) { | ||||||||||||||
// TODO: handle denormals | ||||||||||||||
return conv::shift_sign(data_); | ||||||||||||||
// gap to fp16 denormal exponents (+1 from normal to denormal | ||||||||||||||
// exponent base) | ||||||||||||||
const auto gap_to_fp16 = | ||||||||||||||
(conv::bias_change - ((data_ & f32_traits::exponent_mask) >> | ||||||||||||||
conv::significand_offset) >> | ||||||||||||||
f16_traits::significand_bits) + | ||||||||||||||
emre-safa marked this conversation as resolved.
Show resolved
Hide resolved
Comment on lines
+421
to
+423
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
to ensure the ordering and avoid warning |
||||||||||||||
1; | ||||||||||||||
|
||||||||||||||
// get the tail length which will be rounding | ||||||||||||||
const auto tail_len = gap_to_fp16 + conv::significand_offset; | ||||||||||||||
|
||||||||||||||
if (tail_len > f32_traits::significand_bits + 1) { | ||||||||||||||
return conv::shift_sign(data_); | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
emre-safa marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||
// all significant (including implicitly leading 1) will be | ||||||||||||||
// moved after representation field more than one digit (less | ||||||||||||||
// than half) such that it will rounding to zero. | ||||||||||||||
const auto explicit_significand = | ||||||||||||||
(data_ & f32_traits::significand_mask) | | ||||||||||||||
(1 << f32_traits::significand_bits); | ||||||||||||||
|
||||||||||||||
const auto tail = | ||||||||||||||
explicit_significand & | ||||||||||||||
static_cast<f32_traits::bits_type>((1 << tail_len) - 1); | ||||||||||||||
|
||||||||||||||
auto new_significand = explicit_significand >> tail_len; | ||||||||||||||
|
||||||||||||||
const auto result = | ||||||||||||||
conv::shift_sign(data_) | exp | new_significand; | ||||||||||||||
|
||||||||||||||
const auto half = | ||||||||||||||
static_cast<f32_traits::bits_type>(1 << (tail_len - 1)); | ||||||||||||||
|
||||||||||||||
return result + | ||||||||||||||
(tail > half || ((tail == half) && (result & 1))); | ||||||||||||||
} else { | ||||||||||||||
// Rounding to even | ||||||||||||||
const auto result = conv::shift_sign(data_) | exp | | ||||||||||||||
|
@@ -442,8 +476,42 @@ class alignas(std::uint16_t) half { | |||||||||||||
return conv::shift_sign(data_) | f32_traits::exponent_mask | | ||||||||||||||
f32_traits::significand_mask; | ||||||||||||||
} else if (f16_traits::is_denom(data_)) { | ||||||||||||||
// TODO: handle denormals | ||||||||||||||
return conv::shift_sign(data_); | ||||||||||||||
if (!(data_ & f16_traits::significand_mask)) { | ||||||||||||||
return conv::shift_sign(data_); | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
int leading_zeros{}; | ||||||||||||||
|
||||||||||||||
// Counts leading zeros in the significand to determine the | ||||||||||||||
// normalization shift | ||||||||||||||
#if defined(_MSC_VER) | ||||||||||||||
unsigned long index{}; | ||||||||||||||
_BitScanReverse(&index, static_cast<std::uint32_t>( | ||||||||||||||
f16_traits::significand_mask & data_)); | ||||||||||||||
|
||||||||||||||
leading_zeros = f16_traits::significand_bits - index - 1; | ||||||||||||||
#else | ||||||||||||||
leading_zeros = | ||||||||||||||
__builtin_clz(static_cast<std::uint32_t>( | ||||||||||||||
f16_traits::significand_mask & data_)) - | ||||||||||||||
f16_traits::exponent_bits - f16_traits::sign_bits - | ||||||||||||||
8 * (sizeof(conv::result_bits) - sizeof(conv::source_bits)); | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
this is predefined macro from climits |
||||||||||||||
#endif | ||||||||||||||
|
||||||||||||||
// Computes the new exponent, 0xxxxxxxx000...00 | ||||||||||||||
auto new_exponent = | ||||||||||||||
((conv::bias_change >> f32_traits::significand_bits) - | ||||||||||||||
leading_zeros) | ||||||||||||||
<< f32_traits::significand_bits; | ||||||||||||||
Comment on lines
+502
to
+505
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
It also worth noting leading_zeros is alway less than bias change because we are at half -> float |
||||||||||||||
|
||||||||||||||
// Shifts the original significand to normalize it, remove the | ||||||||||||||
// implicit '1', and align it in the new 23-bit field | ||||||||||||||
auto new_significand = | ||||||||||||||
(static_cast<f32_traits::bits_type>(data_) | ||||||||||||||
<< (conv::significand_offset + leading_zeros + 1)) & | ||||||||||||||
f32_traits::significand_mask; | ||||||||||||||
|
||||||||||||||
return conv::shift_sign(data_) | new_exponent | new_significand; | ||||||||||||||
} else { | ||||||||||||||
return conv::shift_sign(data_) | conv::shift_exponent(data_) | | ||||||||||||||
conv::shift_significand(data_); | ||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe add another test from rounds down to even number by adding one last bit into the end to ensure we compare full tail before shift.