From 793053cfe1a9eb60a03eb56b31e5901e14302804 Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Thu, 4 May 2023 12:27:46 -0700 Subject: [PATCH] Improve floating-point Euclidean division for Float16 and Float32 Fixes #49450. --- base/div.jl | 6 ++++++ test/numbers.jl | 21 +++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/base/div.jl b/base/div.jl index 9c2187e662ee9..a7ecd641b925f 100644 --- a/base/div.jl +++ b/base/div.jl @@ -368,3 +368,9 @@ end # NOTE: C89 fmod() and x87 FPREM implicitly provide truncating float division, # so it is used here as the basis of float div(). div(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = convert(T, round((x - rem(x, y, r)) / y)) + +# Vincent Lefèvre: "The Euclidean Division Implemented with a Floating-Point Division and a Floor" +# https://inria.hal.science/inria-00070403 +# Theorem 1 implies that the following are exact if eps(x/y) <= 1 +div(x::Float32, y::Float32, r::RoundingMode) = Float32(round(Float64(x) / Float64(y), r)) +div(x::Float16, y::Float16, r::RoundingMode) = Float16(round(Float32(x) / Float32(y), r)) diff --git a/test/numbers.jl b/test/numbers.jl index efb2702aff1c2..76098e8a29d33 100644 --- a/test/numbers.jl +++ b/test/numbers.jl @@ -1702,6 +1702,27 @@ end @test cld(-1.1, 0.1) == div(-1.1, 0.1, RoundUp) == ceil(big(-1.1)/big(0.1)) == -11.0 @test fld(-1.1, 0.1) == div(-1.1, 0.1, RoundDown) == floor(big(-1.1)/big(0.1)) == -12.0 end + @testset "issue #49450" begin + @test div(514, Float16(0.75)) === Float16(685) + @test fld(514, Float16(0.75)) === Float16(685) + @test cld(515, Float16(0.75)) === Float16(687) + + @test cld(1, Float16(0.000999)) === Float16(1001) + @test cld(2, Float16(0.001999)) === Float16(1001) + @test cld(3, Float16(0.002934)) === Float16(1023) + @test cld(4, Float16(0.003998)) === Float16(1001) + @test fld(5, Float16(0.004925)) === Float16(1015) + + @test div(4_194_307, Float32(0.75)) === Float32(5_592_409) + @test fld(4_194_307, Float32(0.75)) === Float32(5_592_409) + @test cld(4_194_308, Float32(0.75)) === Float32(5_592_411) + + @test fld(5, Float32(6.556511e-7)) === Float32(7_626_007) + @test fld(10, Float32(1.3113022e-6)) === Float32(7_626_007) + @test fld(11, Float32(1.4305115e-6)) === Float32(7_689_557) + @test cld(16, Float32(2.8014183e-6)) === Float32(5_711_393) + @test cld(17, Float32(2.2053719e-6)) === Float32(7_708_451) + end end @testset "return types" begin for T in (Int8,Int16,Int32,Int64,Int128, UInt8,UInt16,UInt32,UInt64,UInt128)