Skip to content

Commit 89f375f

Browse files
authored
Merge pull request #169 from gboisse/master
Fixed runtime for CL platforms with no fp16 support
2 parents cb09647 + e85b2b8 commit 89f375f

File tree

4 files changed

+28
-10
lines changed

4 files changed

+28
-10
lines changed

Calc/inc/device.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ namespace Calc
5151
std::size_t local_mem_size;
5252
std::size_t max_alloc_size;
5353
std::size_t max_local_size;
54+
55+
bool has_fp16;
5456
};
5557

5658
// Main interface to control compute device

Calc/src/device_clw.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,8 @@ namespace Calc
262262
spec.min_alignment = m_device.GetMinAlignSize();
263263
spec.max_alloc_size = m_device.GetMaxAllocSize();
264264
spec.max_local_size = m_device.GetMaxWorkGroupSize();
265+
266+
spec.has_fp16 = (m_device.GetExtensions().find("cl_khr_fp16") != std::string::npos);
265267
}
266268

267269
Buffer* DeviceClw::CreateBuffer(std::size_t size, std::uint32_t flags)

Calc/src/device_vkw.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ namespace Calc
133133
spec.max_alloc_size = static_cast< std::size_t >(hostMemory);
134134
spec.max_local_size = static_cast< std::size_t >(localMemory);
135135

136+
spec.has_fp16 = device->is_device_extension_supported("GL_AMD_gpu_shader_half_float");
136137
}
137138

138139
// Buffer creation and deletion

RadeonRays/src/intersector/intersector_lds.cpp

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,12 @@ namespace RadeonRays
4949

5050
~Program()
5151
{
52-
executable->DeleteFunction(isect_func);
53-
executable->DeleteFunction(occlude_func);
54-
device->DeleteExecutable(executable);
52+
if (executable)
53+
{
54+
executable->DeleteFunction(isect_func);
55+
executable->DeleteFunction(occlude_func);
56+
device->DeleteExecutable(executable);
57+
}
5558
}
5659

5760
Calc::Device *device;
@@ -101,6 +104,9 @@ namespace RadeonRays
101104
buildopts.append("-D USE_SAFE_MATH ");
102105
#endif
103106

107+
Calc::DeviceSpec spec;
108+
m_device->GetSpec(spec);
109+
104110
#ifndef RR_EMBED_KERNELS
105111
if (device->GetPlatform() == Calc::Platform::kOpenCL)
106112
{
@@ -109,37 +115,44 @@ namespace RadeonRays
109115
int numheaders = sizeof(headers) / sizeof(const char *);
110116

111117
m_gpudata->bvh_prog.executable = m_device->CompileExecutable("../RadeonRays/src/kernels/CL/intersect_bvh2_lds.cl", headers, numheaders, buildopts.c_str());
112-
m_gpudata->qbvh_prog.executable = m_device->CompileExecutable("../RadeonRays/src/kernels/CL/intersect_bvh2_lds_fp16.cl", headers, numheaders, buildopts.c_str());
118+
if (spec.has_fp16)
119+
m_gpudata->qbvh_prog.executable = m_device->CompileExecutable("../RadeonRays/src/kernels/CL/intersect_bvh2_lds_fp16.cl", headers, numheaders, buildopts.c_str());
113120
}
114121
else
115122
{
116123
assert(device->GetPlatform() == Calc::Platform::kVulkan);
117124
m_gpudata->bvh_prog.executable = m_device->CompileExecutable("../RadeonRays/src/kernels/GLSL/bvh2.comp", nullptr, 0, buildopts.c_str());
118-
m_gpudata->qbvh_prog.executable = m_device->CompileExecutable("../RadeonRays/src/kernels/GLSL/bvh2_fp16.comp", nullptr, 0, buildopts.c_str());
125+
if (spec.has_fp16)
126+
m_gpudata->qbvh_prog.executable = m_device->CompileExecutable("../RadeonRays/src/kernels/GLSL/bvh2_fp16.comp", nullptr, 0, buildopts.c_str());
119127
}
120128
#else
121129
#if USE_OPENCL
122130
if (device->GetPlatform() == Calc::Platform::kOpenCL)
123131
{
124132
m_gpudata->bvh_prog.executable = m_device->CompileExecutable(g_intersect_bvh2_lds_opencl, std::strlen(g_intersect_bvh2_lds_opencl), buildopts.c_str());
125-
m_gpudata->qbvh_prog.executable = m_device->CompileExecutable(g_intersect_bvh2_lds_fp16_opencl, std::strlen(g_intersect_bvh2_lds_fp16_opencl), buildopts.c_str());
133+
if (spec.has_fp16)
134+
m_gpudata->qbvh_prog.executable = m_device->CompileExecutable(g_intersect_bvh2_lds_fp16_opencl, std::strlen(g_intersect_bvh2_lds_fp16_opencl), buildopts.c_str());
126135
}
127136
#endif
128137
#if USE_VULKAN
129138
if (device->GetPlatform() == Calc::Platform::kVulkan)
130139
{
131140
if (m_gpudata->bvh_prog.executable == nullptr)
132141
m_gpudata->bvh_prog.executable = m_device->CompileExecutable(g_bvh2_vulkan, std::strlen(g_bvh2_vulkan), buildopts.c_str());
133-
if (m_gpudata->qbvh_prog.executable == nullptr)
142+
if (m_gpudata->qbvh_prog.executable == nullptr && spec.has_fp16)
134143
m_gpudata->qbvh_prog.executable = m_device->CompileExecutable(g_bvh2_fp16_vulkan, std::strlen(g_bvh2_fp16_vulkan), buildopts.c_str());
135144
}
136145
#endif
137146
#endif
138147

139148
m_gpudata->bvh_prog.isect_func = m_gpudata->bvh_prog.executable->CreateFunction("intersect_main");
140-
m_gpudata->qbvh_prog.isect_func = m_gpudata->qbvh_prog.executable->CreateFunction("intersect_main");
141149
m_gpudata->bvh_prog.occlude_func = m_gpudata->bvh_prog.executable->CreateFunction("occluded_main");
142-
m_gpudata->qbvh_prog.occlude_func = m_gpudata->qbvh_prog.executable->CreateFunction("occluded_main");
150+
151+
if (m_gpudata->qbvh_prog.executable)
152+
{
153+
m_gpudata->qbvh_prog.isect_func = m_gpudata->qbvh_prog.executable->CreateFunction("intersect_main");
154+
m_gpudata->qbvh_prog.occlude_func = m_gpudata->qbvh_prog.executable->CreateFunction("occluded_main");
155+
}
143156
}
144157

145158
void IntersectorLDS::Process(const World &world)
@@ -166,7 +179,7 @@ namespace RadeonRays
166179
#if 0
167180
if (type && type->AsString() == "qbvh")
168181
{
169-
use_qbvh = true;
182+
use_qbvh = (m_gpudata->qbvh_prog.executable != nullptr);
170183
}
171184
#endif
172185

0 commit comments

Comments
 (0)