Skip to content

Commit 25a91e4

Browse files
committed
equally fast version with a smaller table.
Uses a quartic which allows a smaller table. Performance is equal to slightly better, and accuracy is similar.
1 parent 89ea7c4 commit 25a91e4

File tree

1 file changed

+20
-18
lines changed

1 file changed

+20
-18
lines changed

base/special/exp.jl

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -36,15 +36,13 @@ const LOG2_E = 1.442695040888963407359924681001892137426646
3636

3737
# log(2) into upper and lower bits
3838
LN2U(::Type{Float32}) = 6.9313812256f-1
39-
4039
LN2L(::Type{Float32}) = 9.0580006145f-6
4140

42-
43-
# -log(2)/1024 into upper and lower bits
44-
LN2o1024U(::Type{Float64}) = -6.769015435155716e-4
45-
LN2o1024L(::Type{Float64}) = -2.264694154146777e-20
46-
# 1024/log(2)
47-
LN2o1024INV(::Type{Float64}) = 1477.3197218702985
41+
# -log(2)/256 into upper and lower bits
42+
LN2o256U(::Type{Float64}) = -2.7076061740622863e-3
43+
LN2o256L(::Type{Float64}) = -9.058776616587108e-20
44+
# 256/log(2)
45+
LN2o256INV(::Type{Float64}) = 369.3299304675746
4846

4947
# magic rounding constant: 1.5*2^52 Adding, then subtracting it from a float rounds it to an Int.
5048
MAGIC_ROUND_CONST(::Type{Float64}) = 6755399441055744.0
@@ -58,11 +56,16 @@ MIN_EXP(::Type{Float64}) = -745.1332191019412076235 # log 2^-1075
5856
MIN_EXP(::Type{Float32}) = -103.97207708f0 # log 2^-150
5957

6058

61-
@inline exp_kernel(x) = evalpoly(x, (0.9999999999999999, 1.0, 0.5000000047728719, 0.16666666809852823))
59+
@inline exp_kernel(x) = evalpoly(x, (1.0,
60+
0.9999999999999912,
61+
0.4999999999999962,
62+
0.16666668575815502,
63+
0.041666671121347275))
64+
6265
@inline exp_kernel(x::Float32) = @horner(x, 1.6666625440f-1, -2.7667332906f-3)
6366

6467
#Note that this would need to be included literally to actually run.
65-
const J_TABLE = Float64[2.0^(-53 + big(j-1)/1024) for j in 1:1024]
68+
const J_TABLE = Float64[2.0^(-53 + big(j-1)/256) for j in 1:256]
6669

6770
# for values smaller than this threshold just use a Taylor expansion
6871
@eval exp_small_thres(::Type{Float32}) = $(2.0f0^-13)
@@ -146,34 +149,33 @@ function exp(x::T) where T<:Float32
146149
end
147150

148151
# Method
149-
# 1. Argument reduction: Reduce x to an r so that |r| <= ln(2)/1024. Given x,
152+
# 1. Argument reduction: Reduce x to an r so that |r| <= ln(2)/512. Given x,
150153
# find r and integers k, j such that
151-
# x = k*ln(2) + j/1024 + r, 0 <= j < 1024, |r| <= ln(2)/1024.
154+
# x = k*ln(2) + j/256 + r, 0 <= j < 256, |r| <= ln(2)/512.
152155
#
153156
# 2. Approximate exp(r) by it's degree 3 taylor series around 0.
154157
# Since the bounds on r are very tight, this is sufficient to be accurate to floating point epsilon.
155158
#
156-
# 3. Scale back: exp(x) = 2^k * 2^(j/1024) * exp(r)
157-
# Since the range of possible j is small, 2^(j/1024) is simply stored for all possible values.
159+
# 3. Scale back: exp(x) = 2^k * 2^(j/256) * exp(r)
160+
# Since the range of possible j is small, 2^(j/256) is simply stored for all possible values.
158161
# Technichally the J_table stores a scaled version which makes subnormal numbers work better.
159162

160163

161-
162164
function myexp(x::T) where T<:Float64
163165
if !(abs(x) < abs(MAX_EXP(T)))
164166
x <= -MIN_EXP(T) && return 0.
165167
x >= MAX_EXP(T) && return Inf
166168
isnan(x) && return x
167169
end
168170

169-
N_float = muladd(x, LN2o1024INV(T), MAGIC_ROUND_CONST(T))
171+
N_float = muladd(x, LN2o256INV(T), MAGIC_ROUND_CONST(T))
170172
N = reinterpret(Int64, N_float) % Int32
171173

172174
N_float -= MAGIC_ROUND_CONST(T)
173-
r = muladd(N_float, LN2o1024L(T), muladd(N_float, LN2o1024U(T), x))
174-
k = N >> 10
175+
r = muladd(N_float, LN2o256L(T), muladd(N_float, LN2o256U(T), x))
176+
k = N >> 8
175177

176-
small_part = @inbounds J_TABLE[N&1023 + 1] * exp_kernel(r)
178+
small_part = @inbounds J_TABLE[N&255 + 1] * exp2_kernel(r)
177179

178180
twopk = rem(53 + k, UInt64) << 52
179181
return reinterpret(T, twopk+reinterpret(Int, small_part))

0 commit comments

Comments
 (0)