Skip to content

Commit 71064e7

Browse files
committed
Improve floating-point Euclidean division for Float16 and Float32
Fixes #49450.
1 parent 93ce36c commit 71064e7

File tree

2 files changed

+27
-0
lines changed

2 files changed

+27
-0
lines changed

base/div.jl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,3 +368,9 @@ end
368368
# NOTE: C89 fmod() and x87 FPREM implicitly provide truncating float division,
369369
# so it is used here as the basis of float div().
370370
div(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = convert(T, round((x - rem(x, y, r)) / y))
371+
372+
# Vincent Lefèvre: "The Euclidean Division Implemented with a Floating-Point Division and a Floor"
373+
# https://inria.hal.science/inria-00070403
374+
# Theorem 1 implies that the following are exact if eps(x/y) <= 1
375+
div(x::Float32, y::Float32, r::RoundingMode) = Float32(round(Float64(x) / Float64(y), r))
376+
div(x::Float16, y::Float16, r::RoundingMode) = Float16(round(Float32(x) / Float32(y), r))

test/numbers.jl

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1702,6 +1702,27 @@ end
17021702
@test cld(-1.1, 0.1) == div(-1.1, 0.1, RoundUp) == ceil(big(-1.1)/big(0.1)) == -11.0
17031703
@test fld(-1.1, 0.1) == div(-1.1, 0.1, RoundDown) == floor(big(-1.1)/big(0.1)) == -12.0
17041704
end
1705+
@testset "issue #49450" begin
1706+
@test div(514, Float16(0.75)) === Float16(685)
1707+
@test fld(514, Float16(0.75)) === Float16(685)
1708+
@test cld(515, Float16(0.75)) === Float16(687)
1709+
1710+
@test cld(1, Float16(0.000999)) === Float16(1001)
1711+
@test cld(2, Float16(0.001999)) === Float16(1001)
1712+
@test cld(3, Float16(0.002934)) === Float16(1023)
1713+
@test cld(4, Float16(0.003998)) === Float16(1001)
1714+
@test fld(5, Float16(0.004925)) === Float16(1015)
1715+
1716+
@test div(4_194_307, Float32(0.75)) === Float32(5_592_409)
1717+
@test fld(4_194_307, Float32(0.75)) === Float32(5_592_409)
1718+
@test cld(4_194_308, Float32(0.75)) === Float32(5_592_411)
1719+
1720+
@test fld(5, Float32(6.556511e-7)) === Float32(7_626_007)
1721+
@test fld(10, Float32(1.3113022e-6)) === Float32(7_626_007)
1722+
@test fld(11, Float32(1.4305115e-6)) === Float32(7_689_557)
1723+
@test cld(16, Float32(2.8014183e-6)) === Float32(5_711_393)
1724+
@test cld(17, Float32(2.2053719e-6)) === Float32(7_708_451)
1725+
end
17051726
end
17061727
@testset "return types" begin
17071728
for T in (Int8,Int16,Int32,Int64,Int128, UInt8,UInt16,UInt32,UInt64,UInt128)

0 commit comments

Comments
 (0)