28
28
#include " hsa/hsa_ext_amd.h"
29
29
#endif
30
30
31
+ #include " llvm/Frontend/Offloading/Utility.h"
32
+
31
33
#include < atomic>
32
34
#include < cstdio>
33
35
#include < cstdlib>
@@ -163,17 +165,13 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable,
163
165
hsa_queue_t *queue, rpc::Server &server,
164
166
const LaunchParameters ¶ms,
165
167
const char *kernel_name, args_t kernel_args,
166
- bool print_resource_usage) {
168
+ uint32_t wavefront_size, bool print_resource_usage) {
167
169
// Look up the kernel in the loaded executable.
168
170
hsa_executable_symbol_t symbol;
169
171
if (hsa_status_t err = hsa_executable_get_symbol_by_name (
170
172
executable, kernel_name, &dev_agent, &symbol))
171
173
return err;
172
174
173
- uint32_t wavefront_size = 0 ;
174
- if (hsa_status_t err = hsa_agent_get_info (
175
- dev_agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size))
176
- handle_error (err);
177
175
// Retrieve different properties of the kernel symbol used for launch.
178
176
uint64_t kernel;
179
177
uint32_t args_size;
@@ -419,6 +417,16 @@ int load(int argc, const char **argv, const char **envp, void *image,
419
417
dev_agent, &coarsegrained_pool))
420
418
handle_error (err);
421
419
420
+ // The AMDGPU target can change its wavefront size. There currently isn't a
421
+ // good way to look this up through the HSA API so we use the LLVM interface.
422
+ uint16_t abi_version;
423
+ llvm::StringRef image_ref (reinterpret_cast <char *>(image), size);
424
+ llvm::StringMap<llvm::offloading::amdgpu::AMDGPUKernelMetaData> info_map;
425
+ if (llvm::Error err = llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage (
426
+ llvm::MemoryBufferRef (image_ref, " " ), info_map, abi_version)) {
427
+ handle_error (llvm::toString (std::move (err)).c_str ());
428
+ }
429
+
422
430
// Allocate fine-grained memory on the host to hold the pointer array for the
423
431
// copied argv and allow the GPU agent to access it.
424
432
auto allocator = [&](uint64_t size) -> void * {
@@ -448,10 +456,10 @@ int load(int argc, const char **argv, const char **envp, void *image,
448
456
hsa_amd_memory_fill (dev_ret, 0 , /* count=*/ 1 );
449
457
450
458
// Allocate finegrained memory for the RPC server and client to share.
451
- uint32_t wavefront_size = 0 ;
452
- if ( hsa_status_t err = hsa_agent_get_info (
453
- dev_agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size))
454
- handle_error (err) ;
459
+ uint32_t wavefront_size =
460
+ llvm::max_element (info_map, []( auto &&x, auto &&y) {
461
+ return x. second . WavefrontSize < y. second . WavefrontSize ;
462
+ })-> second . WavefrontSize ;
455
463
456
464
// Set up the RPC server.
457
465
void *rpc_buffer;
@@ -513,7 +521,6 @@ int load(int argc, const char **argv, const char **envp, void *image,
513
521
if (HSA_STATUS_SUCCESS ==
514
522
hsa_executable_get_symbol_by_name (executable, " __llvm_libc_clock_freq" ,
515
523
&dev_agent, &freq_sym)) {
516
-
517
524
void *host_clock_freq;
518
525
if (hsa_status_t err =
519
526
hsa_amd_memory_pool_allocate (finegrained_pool, sizeof (uint64_t ),
@@ -553,16 +560,17 @@ int load(int argc, const char **argv, const char **envp, void *image,
553
560
554
561
LaunchParameters single_threaded_params = {1 , 1 , 1 , 1 , 1 , 1 };
555
562
begin_args_t init_args = {argc, dev_argv, dev_envp};
556
- if (hsa_status_t err = launch_kernel (dev_agent, executable, kernargs_pool,
557
- coarsegrained_pool, queue, server ,
558
- single_threaded_params, " _begin.kd" ,
559
- init_args , print_resource_usage))
563
+ if (hsa_status_t err = launch_kernel (
564
+ dev_agent, executable, kernargs_pool, coarsegrained_pool, queue,
565
+ server, single_threaded_params, " _begin.kd" , init_args ,
566
+ info_map[ " _begin " ]. WavefrontSize , print_resource_usage))
560
567
handle_error (err);
561
568
562
569
start_args_t args = {argc, dev_argv, dev_envp, dev_ret};
563
570
if (hsa_status_t err = launch_kernel (
564
571
dev_agent, executable, kernargs_pool, coarsegrained_pool, queue,
565
- server, params, " _start.kd" , args, print_resource_usage))
572
+ server, params, " _start.kd" , args, info_map[" _start" ].WavefrontSize ,
573
+ print_resource_usage))
566
574
handle_error (err);
567
575
568
576
void *host_ret;
@@ -580,10 +588,10 @@ int load(int argc, const char **argv, const char **envp, void *image,
580
588
int ret = *static_cast <int *>(host_ret);
581
589
582
590
end_args_t fini_args = {ret};
583
- if (hsa_status_t err = launch_kernel (dev_agent, executable, kernargs_pool,
584
- coarsegrained_pool, queue, server ,
585
- single_threaded_params, " _end.kd" ,
586
- fini_args , print_resource_usage))
591
+ if (hsa_status_t err = launch_kernel (
592
+ dev_agent, executable, kernargs_pool, coarsegrained_pool, queue,
593
+ server, single_threaded_params, " _end.kd" , fini_args ,
594
+ info_map[ " _end " ]. WavefrontSize , print_resource_usage))
587
595
handle_error (err);
588
596
589
597
if (hsa_status_t err = hsa_amd_memory_pool_free (rpc_buffer))
0 commit comments