|
1 | 1 | import MKL_jll
|
2 | 2 |
|
| 3 | +""" |
| 4 | + struct VMLAccuracy |
| 5 | +
|
| 6 | +See [`VML_LA`](@ref), [`VML_HA`](@ref), [`VML_EP`](@ref). |
| 7 | +""" |
3 | 8 | struct VMLAccuracy
|
4 | 9 | mode::UInt
|
5 | 10 | end
|
| 11 | +Base.show(io::IO, m::VMLAccuracy) = print(io, m == VML_LA ? "VML_LA" : |
| 12 | + m == VML_HA ? "VML_HA" : "VML_EP") |
| 13 | +# mkl\include\mkl_vml_defines.h |
| 14 | +# VML_HA - when VML_HA is set, high accuracy VML functions are called |
| 15 | +# VML_LA - when VML_LA is set, low accuracy VML functions are called |
| 16 | +# VML_EP - when VML_EP is set, enhanced performance VML functions are called |
| 17 | +# NOTE: VML_HA, VML_LA and VML_EP must not be used in combination |
| 18 | +""" |
| 19 | + VML_LA :: VMLAccuracy |
6 | 20 |
|
| 21 | +Low Accuracy (LA), which improves performance by reducing accuracy of the two least significant bits. |
| 22 | +""" |
7 | 23 | const VML_LA = VMLAccuracy(0x00000001)
|
| 24 | +""" |
| 25 | + VML_HA :: VMLAccuracy |
| 26 | +
|
| 27 | +High Accuracy (HA), the default mode. Precision to 1 ulp. |
| 28 | +""" |
8 | 29 | const VML_HA = VMLAccuracy(0x00000002)
|
| 30 | +""" |
| 31 | + VML_EP :: VMLAccuracy |
| 32 | +
|
| 33 | +Enhanced Performance (EP), which provides better performance at the cost of significantly reduced accuracy. |
| 34 | +Approximately half of the bits in the mantissa are correct. |
| 35 | +""" |
9 | 36 | const VML_EP = VMLAccuracy(0x00000003)
|
10 | 37 |
|
11 |
| -Base.show(io::IO, m::VMLAccuracy) = print(io, m == VML_LA ? "VML_LA" : |
12 |
| - m == VML_HA ? "VML_HA" : "VML_EP") |
13 |
| - |
| 38 | + |
| 39 | +""" |
| 40 | + struct VMLAccuracy |
| 41 | +
|
| 42 | +See [`VML_DENORMAL_FAST`](@ref), [`VML_DENORMAL_ACCURATE`](@ref). |
| 43 | +""" |
| 44 | +struct VMLFastDenormal |
| 45 | + mode::UInt |
| 46 | +end |
| 47 | +Base.show(io::IO, m::VMLFastDenormal) = print(io, m == VML_DENORMAL_FAST ? "VML_DENORMAL_FAST" : "VML_DENORMAL_ACCURATE") |
| 48 | +# mkl\include\mkl_vml_defines.h |
| 49 | +# FTZ & DAZ mode macros |
| 50 | +# VML_FTZDAZ_ON - FTZ & DAZ MXCSR mode enabled |
| 51 | +# for faster (sub)denormal values processing |
| 52 | +# VML_FTZDAZ_OFF - FTZ & DAZ MXCSR mode disabled |
| 53 | +# for accurate (sub)denormal values processing |
| 54 | +""" |
| 55 | + VML_DENORMAL_FAST :: VMLFastDenormal |
| 56 | +
|
| 57 | +Designed to improve the performance of computations that involve denormalized numbers at the cost of reasonable accuracy loss. |
| 58 | +This mode changes the numeric behavior of the functions: denormalized input values are treated as zeros and denormalized results |
| 59 | +are flushed to zero. Accuracy loss may occur if input and/or output values are close to denormal range. |
| 60 | +""" |
| 61 | +const VML_DENORMAL_FAST = VMLFastDenormal(0x00280000) |
| 62 | +""" |
| 63 | + VML_DENORMAL_ACCURATE :: VMLFastDenormal |
| 64 | +
|
| 65 | +Standard handling of computations that involve denormalized numbers. |
| 66 | +""" |
| 67 | +const VML_DENORMAL_ACCURATE = VMLFastDenormal(0x00140000) |
| 68 | + |
| 69 | + |
| 70 | +struct VMLFpuMode |
| 71 | + mode::UInt |
| 72 | +end |
| 73 | +Base.show(io::IO, m::VMLFpuMode) = print(io, m == VML_FPU_DEFAULT ? "VML_FPU_DEFAULT" : |
| 74 | + m == VML_FPU_FLOAT32 ? "VML_FPU_FLOAT32" : |
| 75 | + m == VML_FPU_FLOAT64 ? "VML_FPU_FLOAT64" : "VML_FPU_RESTORE") |
| 76 | +# mkl\include\mkl_vml_defines.h |
| 77 | +# SETTING OPTIMAL FLOATING-POINT PRECISION AND ROUNDING MODE |
| 78 | +# Definitions below are to set optimal floating-point control word |
| 79 | +# (precision and rounding mode). |
| 80 | +# |
| 81 | +# For their correct work, VML functions change floating-point precision and |
| 82 | +# rounding mode (if necessary). Since control word changing is typically |
| 83 | +# expensive operation, it is recommended to set precision and rounding mode |
| 84 | +# to optimal values before VML function calls. |
| 85 | +# |
| 86 | +# VML_FLOAT_CONSISTENT - use this value if the calls are typically to single |
| 87 | +# precision VML functions |
| 88 | +# VML_DOUBLE_CONSISTENT - use this value if the calls are typically to double |
| 89 | +# precision VML functions |
| 90 | +# VML_RESTORE - restore original floating-point precision and |
| 91 | +# rounding mode |
| 92 | +# VML_DEFAULT_PRECISION - use default (current) floating-point precision and |
| 93 | +# rounding mode |
| 94 | +# NOTE: VML_FLOAT_CONSISTENT, VML_DOUBLE_CONSISTENT, VML_RESTORE and |
| 95 | +# VML_DEFAULT_PRECISION must not be used in combination |
| 96 | +const VML_FPU_DEFAULT = VMLFpuMode(0x00000000) # VML_DEFAULT_PRECISION |
| 97 | +const VML_FPU_FLOAT32 = VMLFpuMode(0x00000010) # VML_FLOAT_CONSISTENT |
| 98 | +const VML_FPU_FLOAT64 = VMLFpuMode(0x00000020) # VML_DOUBLE_CONSISTENT |
| 99 | +const VML_FPU_RESTORE = VMLFpuMode(0x00000030) # VML_RESTORE |
| 100 | + |
| 101 | +# mkl\include\mkl_vml_defines.h |
| 102 | +# ACCURACY, FLOATING-POINT CONTROL, FTZDAZ AND ERROR HANDLING MASKS |
| 103 | +# Accuracy, floating-point and error handling control are packed in |
| 104 | +# the VML mode variable. Macros below are useful to extract accuracy and/or |
| 105 | +# floating-point control and/or error handling control settings. |
| 106 | +# |
| 107 | +# VML_ACCURACY_MASK - extract accuracy bits |
| 108 | +# VML_FPUMODE_MASK - extract floating-point control bits |
| 109 | +# VML_ERRMODE_MASK - extract error handling control bits |
| 110 | +# (including error callback bits) |
| 111 | +# VML_ERRMODE_STDHANDLER_MASK - extract error handling control bits |
| 112 | +# (not including error callback bits) |
| 113 | +# VML_ERRMODE_CALLBACK_MASK - extract error callback bits |
| 114 | +# VML_NUM_THREADS_OMP_MASK - extract OpenMP(R) number of threads mode bits |
| 115 | +# VML_FTZDAZ_MASK - extract FTZ & DAZ bits |
| 116 | +# VML_TRAP_EXCEPTIONS_MASK - extract exception trap bits |
| 117 | +const VML_ACCURACY_MASK = 0x0000000F |
| 118 | +const VML_FPUMODE_MASK = 0x000000F0 |
| 119 | +const VML_ERRMODE_MASK = 0x0000FF00 |
| 120 | +const VML_ERRMODE_STDHANDLER_MASK = 0x00002F00 |
| 121 | +const VML_ERRMODE_CALLBACK_MASK = 0x00001000 |
| 122 | +const VML_NUM_THREADS_OMP_MASK = 0x00030000 |
| 123 | +const VML_FTZDAZ_MASK = 0x003C0000 |
| 124 | +const VML_TRAP_EXCEPTIONS_MASK = 0x0F000000 |
| 125 | + |
| 126 | +# https://www.intel.com/content/www/us/en/develop/documentation/onemkl-developer-reference-c/top/vector-mathematical-functions/vm-service-functions.html |
14 | 127 | vml_get_mode() = ccall((:vmlGetMode, MKL_jll.libmkl_rt), Cuint, ())
|
15 | 128 | vml_set_mode(mode::Integer) = (ccall((:vmlSetMode, MKL_jll.libmkl_rt), Cuint, (UInt,), mode); nothing)
|
16 | 129 |
|
17 |
| -vml_set_accuracy(m::VMLAccuracy) = vml_set_mode((vml_get_mode() & ~0x03) | m.mode) |
18 |
| -vml_get_accuracy() = VMLAccuracy(vml_get_mode() & 0x3) |
| 130 | +""" |
| 131 | + vml_set_accuracy([VML_HA | VML_LA | VML_EP]]) |
| 132 | +
|
| 133 | +Set the current accuracy mode. See [`VML_LA`](@ref), [`VML_HA`](@ref), [`VML_EP`](@ref). |
| 134 | +""" |
| 135 | +vml_set_accuracy(m::VMLAccuracy) = vml_set_mode((vml_get_mode() & ~VML_ACCURACY_MASK) | m.mode) |
| 136 | +""" |
| 137 | + vml_get_accuracy() :: VMLAccuracy |
| 138 | +
|
| 139 | +Get the current accuracy mode. See [`VML_LA`](@ref), [`VML_HA`](@ref), [`VML_EP`](@ref). |
| 140 | +""" |
| 141 | +vml_get_accuracy() = VMLAccuracy(vml_get_mode() & VML_ACCURACY_MASK) |
| 142 | + |
| 143 | +""" |
| 144 | + vml_set_denormalmode([VML_DENORMAL_FAST | VML_DENORMAL_ACCURATE]]) |
| 145 | +
|
| 146 | +Set the current mode of denormal handling. See [`VML_DENORMAL_FAST`](@ref), [`VML_DENORMAL_ACCURATE`](@ref). |
| 147 | +""" |
| 148 | +vml_set_denormalmode(m::VMLFastDenormal) = vml_set_mode((vml_get_mode() & ~VML_FTZDAZ_MASK) | m.mode) |
| 149 | +""" |
| 150 | + vml_get_denormalmode() :: VMLFastDenormal |
| 151 | +
|
| 152 | +Get the current mode of denormal handling. See [`VML_DENORMAL_FAST`](@ref), [`VML_DENORMAL_ACCURATE`](@ref). |
| 153 | +""" |
| 154 | +vml_get_denormalmode() = VMLFastDenormal(vml_get_mode() & VML_FTZDAZ_MASK) |
| 155 | + |
| 156 | +# Ignored with MKL 2022 on i7-5930k, was usefull once upton a time. |
| 157 | +vml_set_fpumode(m::VMLFpuMode) = vml_set_mode((vml_get_mode() & ~VML_FPUMODE_MASK) | m.mode) |
| 158 | +vml_get_fpumode() = VMLFpuMode(vml_get_mode() & VML_FPUMODE_MASK) |
| 159 | + |
| 160 | +# ----------------------------------------------------------------------------------------------- |
| 161 | + |
| 162 | +# https://www.intel.com/content/www/us/en/develop/documentation/onemkl-developer-reference-c/top/support-functions/threading-control.html |
| 163 | +# |
| 164 | +# See: mkl\include\mkl_service.h |
| 165 | +# _Mkl_Api(int,MKL_Domain_Set_Num_Threads,(int nth, int MKL_DOMAIN)) |
| 166 | +# _Mkl_Api(int,MKL_Domain_Get_Max_Threads,(int MKL_DOMAIN)) |
| 167 | +# #define mkl_domain_set_num_threads MKL_Domain_Set_Num_Threads |
| 168 | +# #define mkl_domain_get_max_threads MKL_Domain_Get_Max_Threads |
| 169 | +# |
| 170 | +# See: mkl\include\mkl_types.h |
| 171 | +# define MKL_DOMAIN_ALL 0 |
| 172 | +# define MKL_DOMAIN_BLAS 1 |
| 173 | +# define MKL_DOMAIN_FFT 2 |
| 174 | +const MKL_DOMAIN_VML = 0x3 |
| 175 | +# define MKL_DOMAIN_PARDISO 4 |
| 176 | + |
| 177 | +""" |
| 178 | + vml_get_max_threads() :: Int |
| 179 | +
|
| 180 | +Maximum number of threads that VML may use. By default, or after a call to `vml_set_num_threads(0)`, |
| 181 | +should return the number of cores available to VML. |
| 182 | +""" |
| 183 | +vml_get_max_threads() = Int(ccall((:MKL_Domain_Get_Max_Threads, MKL_jll.libmkl_rt), Cint, (Cint,), MKL_DOMAIN_VML)) |
| 184 | +""" |
| 185 | + vml_set_num_threads(numthreads::Int) :: Bool |
| 186 | +
|
| 187 | +Set the maximum number of threads that VML may use. Use `numthreads=0` to restore the default. |
| 188 | +Return `true` if the operation completed successfully. |
| 189 | +""" |
| 190 | +vml_set_num_threads(numthreads::Int) = Bool(ccall((:MKL_Domain_Set_Num_Threads, MKL_jll.libmkl_rt), Cuint, (Cint,Cint), numthreads, MKL_DOMAIN_VML)) |
| 191 | + |
| 192 | +# See: mkl\include\mkl_service.h |
| 193 | +# _Mkl_Api(double,MKL_Get_Cpu_Frequency,(void)) /* Gets CPU frequency in GHz */ |
| 194 | +# _Mkl_Api(double,MKL_Get_Max_Cpu_Frequency,(void)) /* Gets max CPU frequency in GHz */ |
| 195 | +# #define mkl_get_cpu_frequency MKL_Get_Cpu_Frequency |
| 196 | +# #define mkl_get_max_cpu_frequency MKL_Get_Max_Cpu_Frequency |
| 197 | +# |
| 198 | +# _Mkl_Api(void,MKL_Get_Cpu_Clocks,(unsigned MKL_INT64 *)) /* Gets CPU clocks */ |
| 199 | +# _Mkl_Api(double,MKL_Get_Clocks_Frequency,(void)) /* Gets clocks frequency in GHz */ |
| 200 | +# #define mkl_get_cpu_clocks MKL_Get_Cpu_Clocks |
| 201 | +# #define mkl_get_clocks_frequency MKL_Get_Clocks_Frequency |
| 202 | + |
| 203 | +""" |
| 204 | + vml_get_cpu_frequency() :: Float64 |
| 205 | +
|
| 206 | +Current CPU frequency in GHz, maybe less or more than [`vml_get_max_cpu_frequency`](@ref). |
| 207 | +""" |
| 208 | +vml_get_cpu_frequency() = ccall((:MKL_Get_Cpu_Frequency, MKL_jll.libmkl_rt), Cdouble, ()) |
| 209 | +""" |
| 210 | + vml_get_max_cpu_frequency() :: Float64 |
| 211 | +
|
| 212 | +Official CPU frequency in GHz, as per package specification. See also [`vml_get_cpu_frequency`](@ref). |
| 213 | +""" |
| 214 | +vml_get_max_cpu_frequency() = ccall((:MKL_Get_Max_Cpu_Frequency, MKL_jll.libmkl_rt), Cdouble, ()) |
| 215 | + |
| 216 | +# ----------------------------------------------------------------------------------------------- |
| 217 | + |
| 218 | +# mkl\include\mkl_vml_defines.h |
| 219 | +# ERROR STATUS MACROS |
| 220 | +# VML_STATUS_OK - no errors |
| 221 | +# VML_STATUS_BADSIZE - array dimension is not positive |
| 222 | +# VML_STATUS_BADMEM - invalid pointer passed |
| 223 | +# VML_STATUS_ERRDOM - at least one of arguments is out of function domain |
| 224 | +# VML_STATUS_SING - at least one of arguments caused singularity |
| 225 | +# VML_STATUS_OVERFLOW - at least one of arguments caused overflow |
| 226 | +# VML_STATUS_UNDERFLOW - at least one of arguments caused underflow |
| 227 | +# VML_STATUS_ACCURACYWARNING - function doesn't support set accuracy mode, |
| 228 | +# lower accuracy mode was used instead |
| 229 | +const VML_STATUS_OK = 0 |
| 230 | +const VML_STATUS_BADSIZE = -1 |
| 231 | +const VML_STATUS_BADMEM = -2 |
| 232 | +const VML_STATUS_ERRDOM = 1 |
| 233 | +const VML_STATUS_SING = 2 |
| 234 | +const VML_STATUS_OVERFLOW = 3 |
| 235 | +const VML_STATUS_UNDERFLOW = 4 |
| 236 | +const VML_STATUS_ACCURACYWARNING = 1000 |
19 | 237 |
|
20 | 238 | function vml_check_error()
|
21 | 239 | vml_error = ccall((:vmlClearErrStatus, MKL_jll.libmkl_rt), Cint, ())
|
22 |
| - if vml_error != 0 |
23 |
| - if vml_error == 1 |
| 240 | + if vml_error != VML_STATUS_OK |
| 241 | + if vml_error == VML_STATUS_ERRDOM |
24 | 242 | throw(DomainError(-1, "This function does not support arguments outside its domain"))
|
25 |
| - elseif vml_error == 2 || vml_error == 3 || vml_error == 4 |
| 243 | + elseif vml_error == VML_STATUS_SING || vml_error == VML_STATUS_OVERFLOW || vml_error == VML_STATUS_UNDERFLOW |
26 | 244 | # Singularity, overflow, or underflow
|
27 | 245 | # I don't think Base throws on these
|
28 |
| - elseif vml_error == 1000 |
| 246 | + elseif vml_error == VML_STATUS_ACCURACYWARNING |
29 | 247 | warn("IntelVectorMath does not support $(vml_get_accuracy); lower accuracy used instead")
|
30 |
| - else |
| 248 | + else # VML_STATUS_BADSIZE or VML_STATUS_BADMEM |
31 | 249 | error("an unexpected error occurred in IntelVectorMath ($vml_error)")
|
32 | 250 | end
|
33 | 251 | end
|
|
0 commit comments