@@ -48,6 +48,8 @@ VERSION = 0.3.6.dev
48
48
# HOSTCC = gcc
49
49
50
50
# If you need 32bit binary, define BINARY=32, otherwise define BINARY=64
51
+ # Please note that AVX is not available on 32-bit.
52
+ # Setting BINARY=32 disables AVX/AVX2/AVX-512.
51
53
# BINARY=64
52
54
53
55
# About threaded BLAS. It will be automatically detected if you don't
@@ -57,7 +59,7 @@ VERSION = 0.3.6.dev
57
59
# USE_THREAD = 0
58
60
59
61
# If you're going to use this library with OpenMP, please comment it in.
60
- # This flag is always set for POWER8. Don't modify the flag
62
+ # This flag is always set for POWER8. Don't set USE_OPENMP = 0 if you're targeting POWER8.
61
63
# USE_OPENMP = 1
62
64
63
65
# The OpenMP scheduler to use - by default this is "static" and you
@@ -68,36 +70,45 @@ VERSION = 0.3.6.dev
68
70
# allow you to select the scheduler from the environment variable OMP_SCHEDULE
69
71
# CCOMMON_OPT += -DOMP_SCHED=dynamic
70
72
71
- # You can define maximum number of threads. Basically it should be
72
- # less than actual number of cores. If you don't specify one, it's
73
- # automatically detected by the the script.
73
+ # You can define the maximum number of threads. Basically it should be less
74
+ # than or equal to the number of CPU threads. If you don't specify one, it's
75
+ # automatically detected by the build system.
76
+ # If SMT (aka. HT) is enabled on the system, it may or may not be beneficial to
77
+ # restrict NUM_THREADS to the number of physical cores. By default, the automatic
78
+ # detection includes logical CPUs, thus allowing the use of SMT.
79
+ # Users may opt at runtime to use less than NUM_THREADS threads.
80
+ #
81
+ # Note for package maintainers: you can build OpenBLAS with a large NUM_THREADS
82
+ # value (eg. 32-256) if you expect your users to use that many threads. Due to the way
83
+ # some internal structures are allocated, using a large NUM_THREADS value has a RAM
84
+ # footprint penalty, even if users reduce the actual number of threads at runtime.
74
85
# NUM_THREADS = 24
75
86
76
87
# If you have enabled USE_OPENMP and your application would call
77
- # OpenBLAS's calculation API from multi threads, please comment it in.
78
- # This flag defines how many instances of OpenBLAS's calculation API can
79
- # actually run in parallel. If more threads call OpenBLAS's calculation API,
88
+ # OpenBLAS's calculation API from multiple threads, please comment this in.
89
+ # This flag defines how many instances of OpenBLAS's calculation API can actually
90
+ # run in parallel. If more than NUM_PARALLEL threads call OpenBLAS's calculation API,
80
91
# they need to wait for the preceding API calls to finish or risk data corruption.
81
92
# NUM_PARALLEL = 2
82
93
83
- # if you don't need to install the static library, please comment it in.
94
+ # If you don't need to install the static library, please comment this in.
84
95
# NO_STATIC = 1
85
96
86
- # if you don't need generate the shared library, please comment it in.
97
+ # If you don't need to generate the shared library, please comment this in.
87
98
# NO_SHARED = 1
88
99
89
- # If you don't need CBLAS interface, please comment it in.
100
+ # If you don't need the CBLAS interface, please comment this in.
90
101
# NO_CBLAS = 1
91
102
92
- # If you only want CBLAS interface without installing Fortran compiler,
93
- # please comment it in.
103
+ # If you only want the CBLAS interface without installing a Fortran compiler,
104
+ # please comment this in.
94
105
# ONLY_CBLAS = 1
95
106
96
- # If you don't need LAPACK, please comment it in.
97
- # If you set NO_LAPACK=1, the library automatically sets NO_LAPACKE=1.
107
+ # If you don't need LAPACK, please comment this in.
108
+ # If you set NO_LAPACK=1, the build system automatically sets NO_LAPACKE=1.
98
109
# NO_LAPACK = 1
99
110
100
- # If you don't need LAPACKE (C Interface to LAPACK), please comment it in.
111
+ # If you don't need LAPACKE (C Interface to LAPACK), please comment this in.
101
112
# NO_LAPACKE = 1
102
113
103
114
# Build LAPACK Deprecated functions since LAPACK 3.6.0
@@ -106,7 +117,7 @@ BUILD_LAPACK_DEPRECATED = 1
106
117
# Build RecursiveLAPACK on top of LAPACK
107
118
# BUILD_RELAPACK = 1
108
119
109
- # If you want to use legacy threaded Level 3 implementation.
120
+ # If you want to use the legacy threaded Level 3 implementation.
110
121
# USE_SIMPLE_THREADED_LEVEL3 = 1
111
122
112
123
# If you want to use the new, still somewhat experimental code that uses
@@ -116,19 +127,27 @@ BUILD_LAPACK_DEPRECATED = 1
116
127
# USE_TLS = 1
117
128
118
129
# If you want to drive whole 64bit region by BLAS. Not all Fortran
119
- # compiler supports this. It's safe to keep comment it out if you
120
- # are not sure( equivalent to "-i8" option).
130
+ # compilers support this. It's safe to keep this commented out if you
131
+ # are not sure. (This is equivalent to the "-i8" ifort option).
121
132
# INTERFACE64 = 1
122
133
123
134
# Unfortunately most of kernel won't give us high quality buffer.
124
135
# BLAS tries to find the best region before entering main function,
125
136
# but it will consume time. If you don't like it, you can disable one.
126
137
NO_WARMUP = 1
127
138
128
- # If you want to disable CPU/Memory affinity on Linux.
139
+ # Comment this in if you want to disable OpenBLAS's CPU/Memory affinity handling.
140
+ # This feature is only implemented on Linux, and is always disabled on other platforms.
141
+ # Enabling affinity handling may improve performance, especially on NUMA systems, but
142
+ # it may conflict with certain applications that also try to manage affinity.
143
+ # This conflict can result in threads of the application calling OpenBLAS ending up locked
144
+ # to the same core(s) as OpenBLAS, possibly binding all threads to a single core.
145
+ # For this reason, affinity handling is disabled by default. Can be safely enabled if nothing
146
+ # else modifies affinity settings.
147
+ # Note: enabling affinity has been known to cause problems with NumPy and R
129
148
NO_AFFINITY = 1
130
149
131
- # if you are compiling for Linux and you have more than 16 numa nodes or more than 256 cpus
150
+ # If you are compiling for Linux and you have more than 16 numa nodes or more than 256 cpus
132
151
# BIGNUMA = 1
133
152
134
153
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
0 commit comments