@@ -36,15 +36,13 @@ const LOG2_E = 1.442695040888963407359924681001892137426646
36
36
37
37
# log(2) into upper and lower bits
38
38
LN2U (:: Type{Float32} ) = 6.9313812256f-1
39
-
40
39
LN2L (:: Type{Float32} ) = 9.0580006145f-6
41
40
42
-
43
- # -log(2)/1024 into upper and lower bits
44
- LN2o1024U (:: Type{Float64} ) = - 6.769015435155716e-4
45
- LN2o1024L (:: Type{Float64} ) = - 2.264694154146777e-20
46
- # 1024/log(2)
47
- LN2o1024INV (:: Type{Float64} ) = 1477.3197218702985
41
+ # -log(2)/256 into upper and lower bits
42
+ LN2o256U (:: Type{Float64} ) = - 2.7076061740622863e-3
43
+ LN2o256L (:: Type{Float64} ) = - 9.058776616587108e-20
44
+ # 256/log(2)
45
+ LN2o256INV (:: Type{Float64} ) = 369.3299304675746
48
46
49
47
# magic rounding constant: 1.5*2^52 Adding, then subtracting it from a float rounds it to an Int.
50
48
MAGIC_ROUND_CONST (:: Type{Float64} ) = 6755399441055744.0
@@ -58,11 +56,16 @@ MIN_EXP(::Type{Float64}) = -745.1332191019412076235 # log 2^-1075
58
56
MIN_EXP (:: Type{Float32} ) = - 103.97207708f0 # log 2^-150
59
57
60
58
61
- @inline exp_kernel (x) = evalpoly (x, (0.9999999999999999 , 1.0 , 0.5000000047728719 , 0.16666666809852823 ))
59
+ @inline exp_kernel (x) = evalpoly (x, (1.0 ,
60
+ 0.9999999999999912 ,
61
+ 0.4999999999999962 ,
62
+ 0.16666668575815502 ,
63
+ 0.041666671121347275 ))
64
+
62
65
@inline exp_kernel (x:: Float32 ) = @horner (x, 1.6666625440f-1 , - 2.7667332906f-3 )
63
66
64
67
# Note that this would need to be included literally to actually run.
65
- const J_TABLE = Float64[2.0 ^ (- 53 + big (j- 1 )/ 1024 ) for j in 1 : 1024 ]
68
+ const J_TABLE = Float64[2.0 ^ (- 53 + big (j- 1 )/ 256 ) for j in 1 : 256 ]
66
69
67
70
# for values smaller than this threshold just use a Taylor expansion
68
71
@eval exp_small_thres (:: Type{Float32} ) = $ (2.0f0 ^- 13 )
@@ -146,34 +149,33 @@ function exp(x::T) where T<:Float32
146
149
end
147
150
148
151
# Method
149
- # 1. Argument reduction: Reduce x to an r so that |r| <= ln(2)/1024 . Given x,
152
+ # 1. Argument reduction: Reduce x to an r so that |r| <= ln(2)/512 . Given x,
150
153
# find r and integers k, j such that
151
- # x = k*ln(2) + j/1024 + r, 0 <= j < 1024 , |r| <= ln(2)/1024 .
154
+ # x = k*ln(2) + j/256 + r, 0 <= j < 256 , |r| <= ln(2)/512 .
152
155
#
153
156
# 2. Approximate exp(r) by it's degree 3 taylor series around 0.
154
157
# Since the bounds on r are very tight, this is sufficient to be accurate to floating point epsilon.
155
158
#
156
- # 3. Scale back: exp(x) = 2^k * 2^(j/1024 ) * exp(r)
157
- # Since the range of possible j is small, 2^(j/1024 ) is simply stored for all possible values.
159
+ # 3. Scale back: exp(x) = 2^k * 2^(j/256 ) * exp(r)
160
+ # Since the range of possible j is small, 2^(j/256 ) is simply stored for all possible values.
158
161
# Technichally the J_table stores a scaled version which makes subnormal numbers work better.
159
162
160
163
161
-
162
164
function myexp (x:: T ) where T<: Float64
163
165
if ! (abs (x) < abs (MAX_EXP (T)))
164
166
x <= - MIN_EXP (T) && return 0.
165
167
x >= MAX_EXP (T) && return Inf
166
168
isnan (x) && return x
167
169
end
168
170
169
- N_float = muladd (x, LN2o1024INV (T), MAGIC_ROUND_CONST (T))
171
+ N_float = muladd (x, LN2o256INV (T), MAGIC_ROUND_CONST (T))
170
172
N = reinterpret (Int64, N_float) % Int32
171
173
172
174
N_float -= MAGIC_ROUND_CONST (T)
173
- r = muladd (N_float, LN2o1024L (T), muladd (N_float, LN2o1024U (T), x))
174
- k = N >> 10
175
+ r = muladd (N_float, LN2o256L (T), muladd (N_float, LN2o256U (T), x))
176
+ k = N >> 8
175
177
176
- small_part = @inbounds J_TABLE[N& 1023 + 1 ] * exp_kernel (r)
178
+ small_part = @inbounds J_TABLE[N& 255 + 1 ] * exp2_kernel (r)
177
179
178
180
twopk = rem (53 + k, UInt64) << 52
179
181
return reinterpret (T, twopk+ reinterpret (Int, small_part))
0 commit comments