You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
inline vector<Device_Info> get_devices(constbool print_info=true) { // returns a vector of all available OpenCL devices
179
-
#if defined(_WIN32)
180
-
(void)_putenv((char*)"CL_CONFIG_CPU_FORCE_MAX_MEM_ALLOC_SIZE=17179869183GB"); // fix maximum buffer allocation size limit in Intel CPU Runtime for OpenCL, 2^34-1 is max non-overflowing value
181
-
(void)_putenv((char*)"GPU_SINGLE_ALLOC_PERCENT=100"); // fix maximum buffer allocation size limit for AMD GPUs
182
-
#elif defined(__linux__)
183
-
(void) putenv((char*)"CL_CONFIG_CPU_FORCE_MAX_MEM_ALLOC_SIZE=17179869183GB"); // fix maximum buffer allocation size limit in Intel CPU Runtime for OpenCL, 2^34-1 is max non-overflowing value
184
-
(void) putenv((char*)"GPU_SINGLE_ALLOC_PERCENT=100"); // fix maximum buffer allocation size limit for AMD GPUs
185
-
#endif// Linux
182
+
set_environment_variable((char*)"GPU_SINGLE_ALLOC_PERCENT=100"); // fix maximum buffer allocation size limit for AMD GPUs
183
+
set_environment_variable((char*)"CL_CONFIG_CPU_FORCE_MAX_MEM_ALLOC_SIZE=17179869183GB"); // fix maximum buffer allocation size limit in Intel CPU Runtime for OpenCL, 2^34-1 is max non-overflowing value
186
184
vector<Device_Info> devices; // get all devices of all platforms
187
185
vector<cl::Platform> cl_platforms; // get all platforms (drivers)
188
186
cl::Platform::get(&cl_platforms);
@@ -248,17 +246,18 @@ class Device {
248
246
cl::CommandQueue cl_queue;
249
247
bool exists = false;
250
248
inline string enable_device_capabilities() const { return// enable FP64/FP16 capabilities if available
"\n #pragma OPENCL EXTENSION cl_khr_fp64 : enable"// make sure cl_khr_fp64 extension is enabled
254
-
"\n #endif"
255
-
"\n #ifdef cl_khr_fp16"
256
-
"\n #pragma OPENCL EXTENSION cl_khr_fp16 : enable"// make sure cl_khr_fp16 extension is enabled
257
-
"\n #endif"
258
-
"\n #ifdef cl_khr_int64_base_atomics"
259
-
"\n #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable"// make sure cl_khr_int64_base_atomics extension is enabled
260
-
"\n #endif"
261
-
+(info.legacy_gpu_fma_patch ? "\n #define fma(a, b, c) ((a)*(b)+(c))" : "") // some old GPUs have terrible fma performance, so replace with a*b+c
249
+
string(info.patch_nvidia_fp16 ? "\n #define cl_khr_fp16" : "")+ // Nvidia Pascal and newer GPUs with driver>=520.00 don't report cl_khr_fp16, but do support basic FP16 arithmetic
250
+
string(info.patch_legacy_gpu_fma ? "\n #define fma(a, b, c) ((a)*(b)+(c))" : "")+ // some old GPUs have terrible fma performance, so replace with a*b+c
if(device.info.memory_used>device.info.memory) print_error("Device \""+device.info.name+"\" does not have enough memory. Allocating another "+to_string((uint)(capacity()/1048576ull))+" MB would use a total of "+to_string(device.info.memory_used)+" MB / "+to_string(device.info.memory)+" MB.");
322
321
int error = 0;
323
-
device_buffer = cl::Buffer(device.get_cl_context(), CL_MEM_READ_WRITE|((int)device.info.intel_gpu_above_4gb_patch<<23), capacity(), nullptr, &error); // for Intel GPUs, set flag CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL = (1<<23)
322
+
device_buffer = cl::Buffer(device.get_cl_context(), CL_MEM_READ_WRITE|((int)device.info.patch_intel_gpu_above_4gb<<23), capacity(), nullptr, &error); // for Intel GPUs, set flag CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL = (1<<23)
324
323
if(error==-61) print_error("Memory size is too large at "+to_string((uint)(capacity()/1048576ull))+" MB. Device \""+device.info.name+"\" accepts a maximum buffer size of "+to_string(device.info.max_global_buffer)+" MB.");
325
324
elseif(error) print_error("Device buffer allocation failed with error code "+to_string(error)+".");
0 commit comments