1
- # Based on FDLIBM http://www.netlib.org/fdlibm/e_exp.c
2
- # which is made available under the following licence
3
-
4
- # # Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. Permission
5
- # # to use, copy, modify, and distribute this software is freely granted,
6
- # # provided that this notice is preserved.
7
-
8
- # Method
9
- # 1. Argument reduction: Reduce x to an r so that |r| <= 0.5*ln(2). Given x,
10
- # find r and integer k such that
11
- # x = k*ln(2) + r, |r| <= 0.5*ln(2).
12
- # Here r is represented as r = hi - lo for better accuracy.
13
- #
14
- # 2. Approximate exp(r) by a special rational function on [0, 0.5*ln(2)]:
15
- # R(r^2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r^4/360 + ...
16
- #
17
- # A special Remez algorithm on [0, 0.5*ln(2)] is used to generate a
18
- # polynomial to approximate R.
19
- #
20
- # The computation of exp(r) thus becomes
21
- # 2*r
22
- # exp(r) = 1 + ----------
23
- # R(r) - r
24
- # r*c(r)
25
- # = 1 + r + ----------- (for better accuracy)
26
- # 2 - c(r)
27
- # where
28
- # c(r) = r - (P1*r^2 + P2*r^4 + ... + P5*r^10 + ...).
29
- #
30
- # 3. Scale back: exp(x) = 2^k * exp(r)
1
+ # magic rounding constant: 1.5*2^52 Adding, then subtracting it from a float rounds it to an Int.
2
+ # This works because eps(MAGIC_ROUND_CONST(T)) == one(T), so adding it to a smaller number aligns the lsb to the 1s place.
3
+ # Values for which this trick doesn't work are going to have outputs of 0 or Inf.
4
+ MAGIC_ROUND_CONST (:: Type{Float64} ) = 6.755399441055744e15
5
+ MAGIC_ROUND_CONST (:: Type{Float32} ) = 1.048576f7
6
+
7
+ # max, min, and subnormal arguments
8
+ # max_exp = T(exponent_bias(T)*log(base, big(2)) + log(base, 2 - big(2.0)^-significand_bits(T)))
9
+ MAX_EXP (n:: Val{2} , :: Type{Float32} ) = 128.0f0
10
+ MAX_EXP (n:: Val{2} , :: Type{Float64} ) = 1024.0
11
+ MAX_EXP (n:: Val{:ℯ} , :: Type{Float32} ) = 88.72284f0
12
+ MAX_EXP (n:: Val{:ℯ} , :: Type{Float64} ) = 709.782712893384
13
+ MAX_EXP (n:: Val{10} , :: Type{Float32} ) = 38.53184f0
14
+ MAX_EXP (n:: Val{10} , :: Type{Float64} ) = 308.25471555991675
15
+
16
+ # min_exp = T(-(exponent_bias(T)+significand_bits(T)) * log(base, big(2)))
17
+ MIN_EXP (n:: Val{2} , :: Type{Float32} ) = - 150.0f0
18
+ MIN_EXP (n:: Val{2} , :: Type{Float64} ) = - 1075.0
19
+ MIN_EXP (n:: Val{:ℯ} , :: Type{Float32} ) = - 103.97208f0
20
+ MIN_EXP (n:: Val{:ℯ} , :: Type{Float64} ) = - 745.1332191019412
21
+ MIN_EXP (n:: Val{10} , :: Type{Float32} ) = - 45.1545f0
22
+ MIN_EXP (n:: Val{10} , :: Type{Float64} ) = - 323.60724533877976
23
+
24
+ # subnorm_exp = abs(log(base, floatmin(T)))
25
+ # these vals are positive since it's easier to take abs(x) than -abs(x)
26
+ SUBNORM_EXP (n:: Val{2} , :: Type{Float32} ) = 126.00001f0
27
+ SUBNORM_EXP (n:: Val{2} , :: Type{Float64} ) = 1022.0
28
+ SUBNORM_EXP (n:: Val{:ℯ} , :: Type{Float32} ) = 87.33655f0
29
+ SUBNORM_EXP (n:: Val{:ℯ} , :: Type{Float64} ) = 708.3964185322641
30
+ SUBNORM_EXP (n:: Val{10} , :: Type{Float32} ) = 37.92978f0
31
+ SUBNORM_EXP (n:: Val{10} , :: Type{Float64} ) = 307.6526555685887
32
+
33
+ # 256/log(base, 2) (For Float64 reductions)
34
+ LogBo256INV (:: Val{2} , :: Type{Float64} ) = 256.0
35
+ LogBo256INV (:: Val{:ℯ} , :: Type{Float64} ) = 369.3299304675746
36
+ LogBo256INV (:: Val{10} , :: Type{Float64} ) = 850.4135922911647
37
+
38
+ # -log(base, 2)/256 in upper and lower bits
39
+ # Upper is truncated to only have 34 bits of significand since N has at most
40
+ # ceil(log2(-MIN_EXP(base, Float64)*LogBo256INV(Val(2), Float64))) = 19 bits.
41
+ # This ensures no rounding when multiplying LogBo256U*N for FMAless hardware
42
+ LogBo256U (:: Val{2} , :: Type{Float64} ) = - 0.00390625
43
+ LogBo256U (:: Val{:ℯ} , :: Type{Float64} ) = - 0.002707606173999011
44
+ LogBo256U (:: Val{10} , :: Type{Float64} ) = - 0.0011758984204561784
45
+ LogBo256L (:: Val{2} , :: Type{Float64} ) = 0.0
46
+ LogBo256L (:: Val{:ℯ} , :: Type{Float64} ) = - 6.327543041662719e-14
47
+ LogBo256L (:: Val{10} , :: Type{Float64} ) = - 1.0624811566412999e-13
48
+
49
+ # 1/log(base, 2) (For Float32 reductions)
50
+ LogBINV (:: Val{2} , :: Type{Float32} ) = 1.0f0
51
+ LogBINV (:: Val{:ℯ} , :: Type{Float32} ) = 1.442695f0
52
+ LogBINV (:: Val{10} , :: Type{Float32} ) = 3.321928f0
53
+
54
+ # -log(base, 2) in upper and lower bits
55
+ # Upper is truncated to only have 16 bits of significand since N has at most
56
+ # ceil(log2(-MIN_EXP(n, Float32)*LogBINV(Val(2), Float32))) = 8 bits.
57
+ # This ensures no rounding when multiplying LogBU*N for FMAless hardware
58
+ LogBU (:: Val{2} , :: Type{Float32} ) = - 1.0f0
59
+ LogBU (:: Val{:ℯ} , :: Type{Float32} ) = - 0.69314575f0
60
+ LogBU (:: Val{10} , :: Type{Float32} ) = - 0.3010254f0
61
+ LogBL (:: Val{2} , :: Type{Float32} ) = 0.0f0
62
+ LogBL (:: Val{:ℯ} , :: Type{Float32} ) = - 1.4286068f-6
63
+ LogBL (:: Val{10} , :: Type{Float32} ) = - 4.605039f-6
64
+
65
+ # Range reduced kernels
66
+ @inline function expm1b_kernel (:: Val{2} , x:: Float64 )
67
+ return x * evalpoly (x, (0.6931471805599393 , 0.24022650695910058 ,
68
+ 0.05550411502333161 , 0.009618129548366803 ))
69
+ end
70
+ @inline function expm1b_kernel (:: Val{:ℯ} , x:: Float64 )
71
+ return x * evalpoly (x, (0.9999999999999912 , 0.4999999999999997 ,
72
+ 0.1666666857598779 , 0.04166666857598777 ))
73
+ end
74
+
75
+ @inline function expm1b_kernel (:: Val{10} , x:: Float64 )
76
+ return x * evalpoly (x, (2.3025850929940255 , 2.6509490552391974 ,
77
+ 2.034678825384765 , 1.1712552025835192 ))
78
+ end
79
+
80
+ @inline function expb_kernel (:: Val{2} , x:: Float32 )
81
+ return evalpoly (x, (1.0f0 , 0.6931472f0 , 0.2402265f0 ,
82
+ 0.05550411f0 , 0.009618025f0 ,
83
+ 0.0013333423f0 , 0.00015469732f0 , 1.5316464f-5 ))
84
+ end
85
+ @inline function expb_kernel (:: Val{:ℯ} , x:: Float32 )
86
+ return evalpoly (x, (1.0f0 , 1.0f0 , 0.5f0 , 0.16666667f0 ,
87
+ 0.041666217f0 , 0.008333249f0 ,
88
+ 0.001394858f0 , 0.00019924171f0 ))
89
+ end
90
+ @inline function expb_kernel (:: Val{10} , x:: Float32 )
91
+ return evalpoly (x, (1.0f0 , 2.3025851f0 , 2.650949f0 ,
92
+ 2.0346787f0 , 1.1712426f0 , 0.53937745f0 ,
93
+ 0.20788547f0 , 0.06837386f0 ))
94
+ end
31
95
32
- # log(2)
33
- const LN2 = 6.931471805599453094172321214581765680755001343602552541206800094933936219696955e-01
34
- # log2(e)
35
- const LOG2_E = 1.442695040888963407359924681001892137426646
96
+ # Table stores data with 60 sig figs by using the fact that the first 12 bits of all the
97
+ # values would be the same if stored as regular Float64.
98
+ # This only gains 8 bits since the least significant 4 bits of the exponent
99
+ # of the small part are not the same for all ta
100
+ const JU_MASK = typemax (UInt64)>> 12
101
+ const JL_MASK = typemax (UInt64)>> 8
102
+ const JU_CONST = 0x3FF0000000000000
103
+ const JL_CONST = 0x3C00000000000000
36
104
37
- # log(2) into upper and lower bits
38
- LN2U (:: Type{Float64} ) = 6.93147180369123816490e-1
39
- LN2U (:: Type{Float32} ) = 6.9313812256f-1
40
105
41
- LN2L (:: Type{Float64} ) = 1.90821492927058770002e-10
42
- LN2L (:: Type{Float32} ) = 9.0580006145f-6
106
+ # function make_table(size)
107
+ # t_array = zeros(UInt64, size);
108
+ # for j in 1:size
109
+ # val = 2.0^(BigFloat(j-1)/size)
110
+ # valU = Float64(val, RoundDown)
111
+ # valL = Float64(val-valU)
112
+ # valU = reinterpret(UInt64, valU) & JU_MASK
113
+ # valL = ((reinterpret(UInt64, valL) & JL_MASK)>>44)<<52
114
+ # t_array[j] = valU | valL
115
+ # end
116
+ # return Tuple(t_array)
117
+ # end
118
+ # const J_TABLE = make_table(256);
119
+ const J_TABLE = (0x0000000000000000, 0xaac00b1afa5abcbe, 0x9b60163da9fb3335, 0xab502168143b0280, 0xadc02c9a3e778060, 0x656037d42e11bbcc, 0xa7a04315e86e7f84, 0x84c04e5f72f654b1, 0x8d7059b0d3158574, 0xa510650a0e3c1f88, 0xa8d0706b29ddf6dd, 0x83207bd42b72a836, 0x6180874518759bc8, 0xa4b092bdf66607df, 0x91409e3ecac6f383, 0x85d0a9c79b1f3919, 0x98a0b5586cf9890f, 0x94f0c0f145e46c85, 0x9010cc922b7247f7, 0xa210d83b23395deb, 0x4030e3ec32d3d1a2, 0xa5b0efa55fdfa9c4, 0xae40fb66affed31a, 0x8d41073028d7233e, 0xa4911301d0125b50, 0xa1a11edbab5e2ab5, 0xaf712abdc06c31cb, 0xae8136a814f204aa, 0xa661429aaea92ddf, 0xa9114e95934f312d, 0x82415a98c8a58e51, 0x58f166a45471c3c2, 0xab9172b83c7d517a, 0x70917ed48695bbc0, 0xa7718af9388c8de9, 0x94a1972658375d2f, 0x8e51a35beb6fcb75, 0x97b1af99f8138a1c, 0xa351bbe084045cd3, 0x9001c82f95281c6b, 0x9e01d4873168b9aa, 0xa481e0e75eb44026, 0xa711ed5022fcd91c, 0xa201f9c18438ce4c, 0x8dc2063b88628cd6, 0x935212be3578a819, 0x82a21f49917ddc96, 0x8d322bdda27912d1, 0x99b2387a6e756238, 0x8ac2451ffb82140a, 0x8ac251ce4fb2a63f, 0x93e25e85711ece75, 0x82b26b4565e27cdd, 0x9e02780e341ddf29, 0xa2d284dfe1f56380, 0xab4291ba7591bb6f, 0x86129e9df51fdee1, 0xa352ab8a66d10f12, 0xafb2b87fd0dad98f, 0xa572c57e39771b2e, 0x9002d285a6e4030b, 0x9d12df961f641589, 0x71c2ecafa93e2f56, 0xaea2f9d24abd886a, 0x86f306fe0a31b715, 0x89531432edeeb2fd, 0x8a932170fc4cd831, 0xa1d32eb83ba8ea31, 0x93233c08b26416ff, 0xab23496266e3fa2c, 0xa92356c55f929ff0, 0xa8f36431a2de883a, 0xa4e371a7373aa9ca, 0xa3037f26231e7549, 0xa0b38cae6d05d865, 0xa3239a401b7140ee, 0xad43a7db34e59ff6, 0x9543b57fbfec6cf4, 0xa083c32dc313a8e4, 0x7fe3d0e544ede173, 0x8ad3dea64c123422, 0xa943ec70df1c5174, 0xa413fa4504ac801b, 0x8bd40822c367a024, 0xaf04160a21f72e29, 0xa3d423fb27094689, 0xab8431f5d950a896, 0x88843ffa3f84b9d4, 0x48944e086061892d, 0xae745c2042a7d231, 0x9c946a41ed1d0057, 0xa1e4786d668b3236, 0x73c486a2b5c13cd0, 0xab1494e1e192aed1, 0x99c4a32af0d7d3de, 0xabb4b17dea6db7d6, 0x7d44bfdad5362a27, 0x9054ce41b817c114, 0x98e4dcb299fddd0d, 0xa564eb2d81d8abfe, 0xa5a4f9b2769d2ca6, 0x7a2508417f4531ee, 0xa82516daa2cf6641, 0xac65257de83f4eee, 0xabe5342b569d4f81, 0x879542e2f4f6ad27, 0xa8a551a4ca5d920e, 0xa7856070dde910d1, 0x99b56f4736b527da, 0xa7a57e27dbe2c4ce, 0x82958d12d497c7fd, 0xa4059c0827ff07cb, 0x9635ab07dd485429, 0xa245ba11fba87a02, 0x3c45c9268a5946b7, 0xa195d84590998b92, 0x9ba5e76f15ad2148, 0xa985f6a320dceb70, 0xa60605e1b976dc08, 0x9e46152ae6cdf6f4, 0xa636247eb03a5584, 0x984633dd1d1929fd, 0xa8e6434634ccc31f, 0xa28652b9febc8fb6, 0xa226623882552224, 0xa85671c1c70833f5, 0x60368155d44ca973, 0x880690f4b19e9538, 0xa216a09e667f3bcc, 0x7a36b052fa75173e, 0xada6c012750bdabe, 0x9c76cfdcddd47645, 0xae46dfb23c651a2e, 0xa7a6ef9298593ae4, 0xa9f6ff7df9519483, 0x59d70f7466f42e87, 0xaba71f75e8ec5f73, 0xa6f72f8286ead089, 0xa7a73f9a48a58173, 0x90474fbd35d7cbfd, 0xa7e75feb564267c8, 0x9b777024b1ab6e09, 0x986780694fde5d3f, 0x934790b938ac1cf6, 0xaaf7a11473eb0186, 0xa207b17b0976cfda, 0x9f17c1ed0130c132, 0x91b7d26a62ff86f0, 0x7057e2f336cf4e62, 0xabe7f3878491c490, 0xa6c80427543e1a11, 0x946814d2add106d9, 0xa1582589994cce12, 0x9998364c1eb941f7, 0xa9c8471a4623c7ac, 0xaf2857f4179f5b20, 0xa01868d99b4492ec, 0x85d879cad931a436, 0x99988ac7d98a6699, 0x9d589bd0a478580f, 0x96e8ace5422aa0db, 0x9ec8be05bad61778, 0xade8cf3216b5448b, 0xa478e06a5e0866d8, 0x85c8f1ae99157736, 0x959902fed0282c8a, 0xa119145b0b91ffc5, 0xab2925c353aa2fe1, 0xae893737b0cdc5e4, 0xa88948b82b5f98e4, 0xad395a44cbc8520e, 0xaf296bdd9a7670b2, 0xa1797d829fde4e4f, 0x7ca98f33e47a22a2, 0xa749a0f170ca07b9, 0xa119b2bb4d53fe0c, 0x7c79c49182a3f090, 0xa579d674194bb8d4, 0x7829e86319e32323, 0xaad9fa5e8d07f29d, 0xa65a0c667b5de564, 0x9c6a1e7aed8eb8bb, 0x963a309bec4a2d33, 0xa2aa42c980460ad7, 0xa16a5503b23e255c, 0x650a674a8af46052, 0x9bca799e1330b358, 0xa58a8bfe53c12e58, 0x90fa9e6b5579fdbf, 0x889ab0e521356eba, 0xa81ac36bbfd3f379, 0x97ead5ff3a3c2774, 0x97aae89f995ad3ad, 0xa5aafb4ce622f2fe, 0xa21b0e07298db665, 0x94db20ce6c9a8952, 0xaedb33a2b84f15fa, 0xac1b468415b749b0, 0xa1cb59728de55939, 0x92ab6c6e29f1c52a, 0xad5b7f76f2fb5e46, 0xa24b928cf22749e3, 0xa08ba5b030a10649, 0xafcbb8e0b79a6f1e, 0x823bcc1e904bc1d2, 0xafcbdf69c3f3a206, 0xa08bf2c25bd71e08, 0xa89c06286141b33c, 0x811c199bdd85529c, 0xa48c2d1cd9fa652b, 0x9b4c40ab5fffd07a, 0x912c544778fafb22, 0x928c67f12e57d14b, 0xa86c7ba88988c932, 0x71ac8f6d9406e7b5, 0xaa0ca3405751c4da, 0x750cb720dcef9069, 0xac5ccb0f2e6d1674, 0xa88cdf0b555dc3f9, 0xa2fcf3155b5bab73, 0xa1ad072d4a07897b, 0x955d1b532b08c968, 0xa15d2f87080d89f1, 0x93dd43c8eacaa1d6, 0x82ed5818dcfba487, 0x5fed6c76e862e6d3, 0xa77d80e316c98397, 0x9a0d955d71ff6075, 0x9c2da9e603db3285, 0xa24dbe7cd63a8314, 0x92ddd321f301b460, 0xa1ade7d5641c0657, 0xa72dfc97337b9b5e, 0xadae11676b197d16, 0xa42e264614f5a128, 0xa30e3b333b16ee11, 0x839e502ee78b3ff6, 0xaa7e653924676d75, 0x92de7a51fbc74c83, 0xa77e8f7977cdb73f, 0xa0bea4afa2a490d9, 0x948eb9f4867cca6e, 0xa1becf482d8e67f0, 0x91cee4aaa2188510, 0x9dcefa1bee615a27, 0xa66f0f9c1cb64129, 0x93af252b376bba97, 0xacdf3ac948dd7273, 0x99df50765b6e4540, 0x9faf6632798844f8, 0xa12f7bfdad9cbe13, 0xaeef91d802243c88, 0x874fa7c1819e90d8, 0xacdfbdba3692d513, 0x62efd3c22b8f71f1, 0x74afe9d96b2a23d9)
43
120
44
- # max and min arguments
45
- MAX_EXP (:: Type{Float64} ) = 7.09782712893383996732e2 # log 2^1023*(2-2^-52)
46
- MAX_EXP (:: Type{Float32} ) = 88.72283905206835f0 # log 2^127 *(2-2^-23)
121
+ @inline function table_unpack (ind)
122
+ j = @inbounds J_TABLE[ind]
123
+ jU = reinterpret (Float64, JU_CONST | (j& JU_MASK))
124
+ jL = reinterpret (Float64, JL_CONST | (j>> 8 ))
125
+ return jU, jL
126
+ end
47
127
48
- # one less than the min exponent since we can sqeeze a bit more from the exp function
49
- MIN_EXP (:: Type{Float64} ) = - 7.451332191019412076235e2 # log 2^-1075
50
- MIN_EXP (:: Type{Float32} ) = - 103.97207708f0 # log 2^-150
128
+ # Method for Float64
129
+ # 1. Argument reduction: Reduce x to an r so that |r| <= log(b, 2)/512. Given x, base b,
130
+ # find r and integers k, j such that
131
+ # x = (k + j/256)*log(b, 2) + r, 0 <= j < 256, |r| <= log(b,2)/512.
132
+ #
133
+ # 2. Approximate b^r-1 by 3rd-degree minimax polynomial p_b(r) on the interval [-log(b,2)/512, log(b,2)/512].
134
+ # Since the bounds on r are very tight, this is sufficient to be accurate to floating point epsilon.
135
+ #
136
+ # 3. Scale back: b^x = 2^k * 2^(j/256) * (1 + p_b(r))
137
+ # Since the range of possible j is small, 2^(j/256) is stored for all possible values in slightly extended precision.
51
138
52
- @inline exp_kernel (x:: Float64 ) = @horner (x, 1.66666666666666019037e-1 ,
53
- - 2.77777777770155933842e-3 , 6.61375632143793436117e-5 ,
54
- - 1.65339022054652515390e-6 , 4.13813679705723846039e-8 )
139
+ # Method for Float32
140
+ # 1. Argument reduction: Reduce x to an r so that |r| <= log(b, 2)/2. Given x, base b,
141
+ # find r and integer N such that
142
+ # x = N*log(b, 2) + r, |r| <= log(b,2)/2.
143
+ #
144
+ # 2. Approximate b^r by 7th-degree minimax polynomial p_b(r) on the interval [-log(b,2)/2, log(b,2)/2].
145
+ # 3. Scale back: b^x = 2^N * p_b(r)
55
146
56
- @inline exp_kernel (x:: Float32 ) = @horner (x, 1.6666625440f-1 , - 2.7667332906f-3 )
147
+ # For both, a little extra care needs to be taken if b^r is subnormal.
148
+ # The solution is to do the scaling back in 2 steps as just messing with the exponent wouldn't work.
149
+ for (func, base) in (:exp2 => Val (2 ), :exp => Val (:ℯ ), :exp10 => Val (10 ))
150
+ @eval begin
151
+ ($ func)(x:: Real ) = ($ func)(float (x))
152
+ @inline function ($ func)(x:: T ) where T<: Float64
153
+ N_float = muladd (x, LogBo256INV ($ base, T), MAGIC_ROUND_CONST (T))
154
+ N = reinterpret (uinttype (T), N_float) % Int32
155
+ N_float -= MAGIC_ROUND_CONST (T) # N_float now equals round(x*LogBo256INV($base, T))
156
+ r = muladd (N_float, LogBo256U ($ base, T), x)
157
+ r = muladd (N_float, LogBo256L ($ base, T), r)
158
+ k = N >> 8
159
+ jU, jL = table_unpack (N& 255 + 1 )
160
+ small_part = muladd (jU, expm1b_kernel ($ base, r), jL) + jU
57
161
58
- # for values smaller than this threshold just use a Taylor expansion
59
- @eval exp_small_thres (:: Type{Float64} ) = $ (2.0 ^- 28 )
60
- @eval exp_small_thres (:: Type{Float32} ) = $ (2.0f0 ^- 13 )
162
+ if ! (abs (x) <= SUBNORM_EXP ($ base, T))
163
+ x >= MAX_EXP ($ base, T) && return Inf
164
+ x <= MIN_EXP ($ base, T) && return 0.0
165
+ if k <= - 53
166
+ # The UInt64 forces promotion. (Only matters for 32 bit systems.)
167
+ twopk = (k + UInt64 (53 )) << 52
168
+ return reinterpret (T, twopk + reinterpret (UInt64, small_part))* (2.0 ^- 53 )
169
+ end
170
+ end
171
+ twopk = Int64 (k) << 52
172
+ return reinterpret (T, twopk + reinterpret (Int64, small_part))
173
+ end
61
174
62
- """
175
+ @inline function ($ func)(x:: T ) where T<: Float32
176
+ N_float = round (x* LogBINV ($ base, T))
177
+ N = unsafe_trunc (Int32, N_float)
178
+ r = muladd (N_float, LogBU ($ base, T), x)
179
+ r = muladd (N_float, LogBL ($ base, T), r)
180
+ small_part = expb_kernel ($ base, r)
181
+ if ! (abs (x) <= SUBNORM_EXP ($ base, T))
182
+ x > MAX_EXP ($ base, T) && return Inf32
183
+ x < MIN_EXP ($ base, T) && return 0.0f0
184
+ if N<= Int32 (- 24 )
185
+ twopk = reinterpret (T, (N+ Int32 (151 )) << Int32 (23 ))
186
+ return (twopk* small_part)* (2f0 ^ (- 24 ))
187
+ end
188
+ N == exponent_max (T) && return small_part * T (2.0 ) * T (2.0 )^ (exponent_max (T) - 1 )
189
+ end
190
+ twopk = reinterpret (T, (N+ Int32 (127 )) << Int32 (23 ))
191
+ return twopk* small_part
192
+ end
193
+ end
194
+ end
195
+ @doc """
63
196
exp(x)
64
197
65
198
Compute the natural base exponential of `x`, in other words ``e^x``.
@@ -68,71 +201,4 @@ Compute the natural base exponential of `x`, in other words ``e^x``.
68
201
```jldoctest
69
202
julia> exp(1.0)
70
203
2.718281828459045
71
- ```
72
- """
73
- exp (x:: Real ) = exp (float (x))
74
- function exp (x:: T ) where T<: Union{Float32,Float64}
75
- xa = reinterpret (Unsigned, x) & ~ sign_mask (T)
76
- xsb = signbit (x)
77
-
78
- # filter out non-finite arguments
79
- if xa > reinterpret (Unsigned, MAX_EXP (T))
80
- if xa >= exponent_mask (T)
81
- xa & significand_mask (T) != 0 && return T (NaN )
82
- return xsb ? T (0.0 ) : T (Inf ) # exp(+-Inf)
83
- end
84
- x > MAX_EXP (T) && return T (Inf )
85
- x < MIN_EXP (T) && return T (0.0 )
86
- end
87
- # This implementation gives 2.7182818284590455 for exp(1.0) when T ==
88
- # Float64, which is well within the allowable error; however,
89
- # 2.718281828459045 is closer to the true value so we prefer that answer,
90
- # given that 1.0 is such an important argument value.
91
- if x == T (1.0 ) && T == Float64
92
- return 2.718281828459045235360
93
- end
94
- # compute approximation
95
- if xa > reinterpret (Unsigned, T (0.5 )* T (LN2)) # |x| > 0.5 log(2)
96
- # argument reduction
97
- if xa < reinterpret (Unsigned, T (1.5 )* T (LN2)) # |x| < 1.5 log(2)
98
- if xsb
99
- k = - 1
100
- hi = x + LN2U (T)
101
- lo = - LN2L (T)
102
- else
103
- k = 1
104
- hi = x - LN2U (T)
105
- lo = LN2L (T)
106
- end
107
- else
108
- n = round (T (LOG2_E)* x)
109
- k = unsafe_trunc (Int,n)
110
- hi = muladd (n, - LN2U (T), x)
111
- lo = n* LN2L (T)
112
- end
113
- # compute approximation on reduced argument
114
- r = hi - lo
115
- z = r* r
116
- p = r - z* exp_kernel (z)
117
- y = T (1.0 ) - ((lo - (r* p)/ (T (2.0 ) - p)) - hi)
118
- # scale back
119
- if k > - significand_bits (T)
120
- # multiply by 2.0 first to prevent overflow, which helps extends the range
121
- k == exponent_max (T) && return y * T (2.0 ) * T (2.0 )^ (exponent_max (T) - 1 )
122
- twopk = reinterpret (T, rem (exponent_bias (T) + k, uinttype (T)) << significand_bits (T))
123
- return y* twopk
124
- else
125
- # add significand_bits(T) + 1 to lift the range outside the subnormals
126
- twopk = reinterpret (T, rem (exponent_bias (T) + significand_bits (T) + 1 + k, uinttype (T)) << significand_bits (T))
127
- return y * twopk * T (2.0 )^ (- significand_bits (T) - 1 )
128
- end
129
- elseif xa < reinterpret (Unsigned, exp_small_thres (T)) # |x| < exp_small_thres
130
- # Taylor approximation for small values: exp(x) ≈ 1.0 + x
131
- return T (1.0 ) + x
132
- else
133
- # primary range with k = 0, so compute approximation directly
134
- z = x* x
135
- p = x - z* exp_kernel (z)
136
- return T (1.0 ) - ((x* p)/ (p - T (2.0 )) - x)
137
- end
138
- end
204
+ """ exp (x:: Real )
0 commit comments