Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions core/test/base/half.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,70 @@ TEST(FloatToHalf, TruncatesLargeNumberRoundToEven)
}


TEST(FloatToHalf, ConvertsRandomPositiveNumber)
{
half x = create_from_bits<float>("0" "01101011" "00011011000001011110010");

ASSERT_EQ(get_bits(x), get_bits("0" "00000" "0000010010"));
}


TEST(FloatToHalf, RoundsUpToEvenNumber)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe add another test from rounds down to even number by adding one last bit into the end to ensure we compare full tail before shift.

{
half x = create_from_bits<float>("0" "01101001" "11100000000000000000000");

ASSERT_EQ(get_bits(x), get_bits("0" "00000" "0000001000"));
}


TEST(FloatToHalf, RoundsDownToEvenNumber)
{
half x = create_from_bits<float>("1" "01101100" "11010100000000000000000");

ASSERT_EQ(get_bits(x), get_bits("1" "00000" "0000111010"));
}


TEST(FloatToHalf, LargestNumberThatConvertsToZero)
{
half x = create_from_bits<float>("0" "01100110" "00000000000000000000000");

ASSERT_EQ(get_bits(x), get_bits("0" "00000" "0000000000"));
}


TEST(FloatToHalf, SmallestNumberThatDoesntConvertToZero)
{
half x = create_from_bits<float>("0" "01100110" "00000000000000000000001");

ASSERT_EQ(get_bits(x), get_bits("0" "00000" "0000000001"));
}


TEST(FloatToHalf, RandomNumberThatConvertsToPositiveZero)
{
half x = create_from_bits<float>("0" "01001101" "10010100101111001101010");

ASSERT_EQ(get_bits(x), get_bits("0" "00000" "0000000000"));
}


TEST(FloatToHalf, RandomNumberThatConvertsToNegativeZero)
{
half x = create_from_bits<float>("1" "01001000" "10000010110100001010001");

ASSERT_EQ(get_bits(x), get_bits("1" "00000" "0000000000"));
}


TEST(FloatToHalf, BoundaryTest)
{
half x = create_from_bits<float>("0" "01110000" "11111111110000000000000");

ASSERT_EQ(get_bits(x), get_bits("0" "00001" "0000000000"));
}


TEST(HalfToFloat, ConvertsOne)
{
float x = create_from_bits<half>("0" "01111" "0000000000");
Expand Down Expand Up @@ -195,4 +259,52 @@ TEST(HalfToFloat, ExtendsLargeNumber)
}


TEST(HalfToFloat, ConvertsPositiveRandomDenormal)
{
float x = create_from_bits<half>("0" "00000" "1110101100");

ASSERT_EQ(get_bits(x), get_bits("0" "01110000" "11010110000000000000000"));
}


TEST(HalfToFloat, ConvertsNegativeRandomDenormal)
{
float x = create_from_bits<half>("1" "00000" "0010111101");

ASSERT_EQ(get_bits(x), get_bits("1" "01101110" "01111010000000000000000"));
}


TEST(HalfToFloat, ConvertsSmallestPositiveDenormal)
{
float x = create_from_bits<half>("0" "00000" "0000000001");

ASSERT_EQ(get_bits(x), get_bits("0" "01100111" "00000000000000000000000"));
}


TEST(HalfToFloat, ConvertsSmallestNegativeDenormal)
{
float x = create_from_bits<half>("1" "00000" "1111111111");

ASSERT_EQ(get_bits(x), get_bits("1" "01110000" "11111111100000000000000"));
}


TEST(HalfToFloat, ConvertsLargestPositiveDenormal)
{
float x = create_from_bits<half>("0" "00000" "1111111111");

ASSERT_EQ(get_bits(x), get_bits("0" "01110000" "11111111100000000000000"));
}


TEST(HalfToFloat, ConvertsLargestNegativeDenormal)
{
float x = create_from_bits<half>("1" "00000" "0000000001");

ASSERT_EQ(get_bits(x), get_bits("1" "01100111" "00000000000000000000000"));
}


// clang-format on
76 changes: 72 additions & 4 deletions include/ginkgo/core/base/half.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -415,8 +415,42 @@ class alignas(std::uint16_t) half {
if (f16_traits::is_inf(exp)) {
return conv::shift_sign(data_) | exp;
} else if (f16_traits::is_denom(exp)) {
// TODO: handle denormals
return conv::shift_sign(data_);
// gap to fp16 denormal exponents (+1 from normal to denormal
// exponent base)
const auto gap_to_fp16 =
(conv::bias_change - ((data_ & f32_traits::exponent_mask) >>
conv::significand_offset) >>
f16_traits::significand_bits) +
Comment on lines +421 to +423
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
(conv::bias_change - ((data_ & f32_traits::exponent_mask) >>
conv::significand_offset) >>
f16_traits::significand_bits) +
((conv::bias_change - ((data_ & f32_traits::exponent_mask) >>
conv::significand_offset)) >>
f16_traits::significand_bits) +

to ensure the ordering and avoid warning

1;

// get the tail length which will be rounding
const auto tail_len = gap_to_fp16 + conv::significand_offset;

if (tail_len > f32_traits::significand_bits + 1) {
return conv::shift_sign(data_);
}

// all significant (including implicitly leading 1) will be
// moved after representation field more than one digit (less
// than half) such that it will rounding to zero.
const auto explicit_significand =
(data_ & f32_traits::significand_mask) |
(1 << f32_traits::significand_bits);

const auto tail =
explicit_significand &
static_cast<f32_traits::bits_type>((1 << tail_len) - 1);

auto new_significand = explicit_significand >> tail_len;

const auto result =
conv::shift_sign(data_) | exp | new_significand;

const auto half =
static_cast<f32_traits::bits_type>(1 << (tail_len - 1));

return result +
(tail > half || ((tail == half) && (result & 1)));
} else {
// Rounding to even
const auto result = conv::shift_sign(data_) | exp |
Expand All @@ -442,8 +476,42 @@ class alignas(std::uint16_t) half {
return conv::shift_sign(data_) | f32_traits::exponent_mask |
f32_traits::significand_mask;
} else if (f16_traits::is_denom(data_)) {
// TODO: handle denormals
return conv::shift_sign(data_);
if (!(data_ & f16_traits::significand_mask)) {
return conv::shift_sign(data_);
}

int leading_zeros{};

// Counts leading zeros in the significand to determine the
// normalization shift
#if defined(_MSC_VER)
unsigned long index{};
_BitScanReverse(&index, static_cast<std::uint32_t>(
f16_traits::significand_mask & data_));

leading_zeros = f16_traits::significand_bits - index - 1;
#else
leading_zeros =
__builtin_clz(static_cast<std::uint32_t>(
f16_traits::significand_mask & data_)) -
f16_traits::exponent_bits - f16_traits::sign_bits -
8 * (sizeof(conv::result_bits) - sizeof(conv::source_bits));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
8 * (sizeof(conv::result_bits) - sizeof(conv::source_bits));
CHAR_BIT * (sizeof(conv::result_bits) - sizeof(conv::source_bits));

this is predefined macro from climits

#endif

// Computes the new exponent, 0xxxxxxxx000...00
auto new_exponent =
((conv::bias_change >> f32_traits::significand_bits) -
leading_zeros)
<< f32_traits::significand_bits;
Comment on lines +502 to +505
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
auto new_exponent =
((conv::bias_change >> f32_traits::significand_bits) -
leading_zeros)
<< f32_traits::significand_bits;
auto new_exponent =
conv::bias_change - (leading_zeros << f32_traits::significand_bits);

It also worth noting leading_zeros is alway less than bias change because we are at half -> float


// Shifts the original significand to normalize it, remove the
// implicit '1', and align it in the new 23-bit field
auto new_significand =
(static_cast<f32_traits::bits_type>(data_)
<< (conv::significand_offset + leading_zeros + 1)) &
f32_traits::significand_mask;

return conv::shift_sign(data_) | new_exponent | new_significand;
} else {
return conv::shift_sign(data_) | conv::shift_exponent(data_) |
conv::shift_significand(data_);
Expand Down
Loading