Skip to content

Commit 4bae4e1

Browse files
JonChesterfieldmemfrob
authored andcommitted
[libomptarget][amdgpu] Drop env variables
Use the same debug print as the rest of libomptarget plugins with the same environment control. Also drop the max queue size debugging hook as I don't believe it is still in use, can bring it back near the rest of the env handling in rtl.cpp if someone objects. That makes most of rt.h and all of utils.cpp unused. Clean that up and simplify control flow in a couple of places. Behaviour change is that debug prints that used to use the old environment variable now use the new one and print in slightly different format, and the removal of the max queue size variable. Reviewed By: pdhaliwal Differential Revision: https://reviews.llvm.org/D108784
1 parent 423729d commit 4bae4e1

File tree

8 files changed

+69
-197
lines changed

8 files changed

+69
-197
lines changed

openmp/libomptarget/plugins/amdgpu/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ add_library(omptarget.rtl.amdgpu SHARED
6767
impl/data.cpp
6868
impl/get_elf_mach_gfx_name.cpp
6969
impl/system.cpp
70-
impl/utils.cpp
7170
impl/msgpack.cpp
7271
src/rtl.cpp
7372
${LIBOMPTARGET_EXTRA_SOURCE}

openmp/libomptarget/plugins/amdgpu/impl/data.cpp

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,21 @@
1717
using core::TaskImpl;
1818

1919
namespace core {
20-
21-
hsa_status_t Runtime::HostMalloc(void **ptr, size_t size,
22-
hsa_amd_memory_pool_t MemoryPool) {
20+
namespace Runtime {
21+
hsa_status_t HostMalloc(void **ptr, size_t size,
22+
hsa_amd_memory_pool_t MemoryPool) {
2323
hsa_status_t err = hsa_amd_memory_pool_allocate(MemoryPool, size, 0, ptr);
24-
DEBUG_PRINT("Malloced %p\n", *ptr);
25-
24+
DP("Malloced %p\n", *ptr);
2625
if (err == HSA_STATUS_SUCCESS) {
2726
err = core::allow_access_to_all_gpu_agents(*ptr);
2827
}
29-
return (err == HSA_STATUS_SUCCESS) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
28+
return err;
3029
}
3130

32-
hsa_status_t Runtime::Memfree(void *ptr) {
31+
hsa_status_t Memfree(void *ptr) {
3332
hsa_status_t err = hsa_amd_memory_pool_free(ptr);
34-
DEBUG_PRINT("Freed %p\n", ptr);
35-
36-
return (err == HSA_STATUS_SUCCESS) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
33+
DP("Freed %p\n", ptr);
34+
return err;
3735
}
38-
36+
} // namespace Runtime
3937
} // namespace core

openmp/libomptarget/plugins/amdgpu/impl/impl.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
77
//===----------------------------------------------------------------------===//
8-
#include "impl_runtime.h"
98
#include "hsa_api.h"
9+
#include "impl_runtime.h"
1010
#include "internal.h"
1111
#include "rt.h"
1212
#include <memory>
@@ -32,7 +32,7 @@ static hsa_status_t invoke_hsa_copy(hsa_signal_t sig, void *dest,
3232
hsa_signal_value_t got = init;
3333
while (got == init) {
3434
got = hsa_signal_wait_scacquire(sig, HSA_SIGNAL_CONDITION_NE, init,
35-
UINT64_MAX, ATMI_WAIT_STATE);
35+
UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
3636
}
3737

3838
if (got != success) {
@@ -64,8 +64,7 @@ hsa_status_t impl_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
6464
void *tempHostPtr;
6565
hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size, MemoryPool);
6666
if (ret != HSA_STATUS_SUCCESS) {
67-
DEBUG_PRINT("HostMalloc: Unable to alloc %zu bytes for temp scratch\n",
68-
size);
67+
DP("HostMalloc: Unable to alloc %zu bytes for temp scratch\n", size);
6968
return ret;
7069
}
7170
std::unique_ptr<void, implFreePtrDeletor> del(tempHostPtr);
@@ -94,8 +93,7 @@ hsa_status_t impl_memcpy_d2h(hsa_signal_t signal, void *dest,
9493
void *tempHostPtr;
9594
hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size, MemoryPool);
9695
if (ret != HSA_STATUS_SUCCESS) {
97-
DEBUG_PRINT("HostMalloc: Unable to alloc %zu bytes for temp scratch\n",
98-
size);
96+
DP("HostMalloc: Unable to alloc %zu bytes for temp scratch\n", size);
9997
return ret;
10098
}
10199
std::unique_ptr<void, implFreePtrDeletor> del(tempHostPtr);

openmp/libomptarget/plugins/amdgpu/impl/internal.h

Lines changed: 12 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,12 @@
2424
#include "hsa_api.h"
2525

2626
#include "impl_runtime.h"
27-
#include "rt.h"
27+
28+
#ifndef TARGET_NAME
29+
#error "Missing TARGET_NAME macro"
30+
#endif
31+
#define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL"
32+
#include "Debug.h"
2833

2934
#define MAX_NUM_KERNELS (1024 * 16)
3035

@@ -41,34 +46,6 @@ typedef struct impl_implicit_args_s {
4146
unsigned long kernarg_template_ptr;
4247
} impl_implicit_args_t;
4348

44-
extern "C" {
45-
46-
#ifdef DEBUG
47-
#define DEBUG_PRINT(fmt, ...) \
48-
if (core::Runtime::getInstance().getDebugMode()) { \
49-
fprintf(stderr, "[%s:%d] " fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
50-
}
51-
#else
52-
#define DEBUG_PRINT(...) \
53-
do { \
54-
} while (false)
55-
#endif
56-
57-
#ifndef HSA_RUNTIME_INC_HSA_H_
58-
typedef struct hsa_signal_s {
59-
uint64_t handle;
60-
} hsa_signal_t;
61-
#endif
62-
63-
}
64-
65-
/* ---------------------------------------------------------------------------------
66-
* Simulated CPU Data Structures and API
67-
* ---------------------------------------------------------------------------------
68-
*/
69-
70-
#define ATMI_WAIT_STATE HSA_WAIT_STATE_BLOCKED
71-
7249
// ---------------------- Kernel Start -------------
7350
typedef struct atl_kernel_info_s {
7451
uint64_t kernel_object;
@@ -110,7 +87,7 @@ struct SignalPoolT {
11087
state.pop();
11188
hsa_status_t rc = hsa_signal_destroy(signal);
11289
if (rc != HSA_STATUS_SUCCESS) {
113-
DEBUG_PRINT("Signal pool destruction failed\n");
90+
DP("Signal pool destruction failed\n");
11491
}
11592
}
11693
}
@@ -183,6 +160,10 @@ bool handle_group_signal(hsa_signal_value_t value, void *arg);
183160
hsa_status_t allow_access_to_all_gpu_agents(void *ptr);
184161
} // namespace core
185162

186-
const char *get_error_string(hsa_status_t err);
163+
inline const char *get_error_string(hsa_status_t err) {
164+
const char *res;
165+
hsa_status_t rc = hsa_status_string(err, &res);
166+
return (rc == HSA_STATUS_SUCCESS) ? res : "HSA_STATUS UNKNOWN.";
167+
}
187168

188169
#endif // SRC_RUNTIME_INCLUDE_INTERNAL_H_

openmp/libomptarget/plugins/amdgpu/impl/rt.h

Lines changed: 16 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -8,74 +8,26 @@
88
#ifndef SRC_RUNTIME_INCLUDE_RT_H_
99
#define SRC_RUNTIME_INCLUDE_RT_H_
1010

11-
#include "impl_runtime.h"
1211
#include "hsa_api.h"
12+
#include "impl_runtime.h"
13+
#include "internal.h"
14+
1315
#include <string>
1416

1517
namespace core {
16-
17-
#define DEFAULT_MAX_QUEUE_SIZE 4096
18-
#define DEFAULT_DEBUG_MODE 0
19-
class Environment {
20-
public:
21-
Environment()
22-
: max_queue_size_(DEFAULT_MAX_QUEUE_SIZE),
23-
debug_mode_(DEFAULT_DEBUG_MODE) {
24-
GetEnvAll();
25-
}
26-
27-
void GetEnvAll();
28-
29-
int getMaxQueueSize() const { return max_queue_size_; }
30-
int getDebugMode() const { return debug_mode_; }
31-
32-
private:
33-
std::string GetEnv(const char *name) {
34-
char *env = getenv(name);
35-
std::string ret;
36-
if (env) {
37-
ret = env;
38-
}
39-
return ret;
40-
}
41-
42-
int max_queue_size_;
43-
int debug_mode_;
44-
};
45-
46-
class Runtime final {
47-
public:
48-
static Runtime &getInstance() {
49-
static Runtime instance;
50-
return instance;
51-
}
52-
53-
// modules
54-
static hsa_status_t RegisterModuleFromMemory(
55-
void *, size_t, hsa_agent_t agent,
56-
hsa_status_t (*on_deserialized_data)(void *data, size_t size,
57-
void *cb_state),
58-
void *cb_state, std::vector<hsa_executable_t> &HSAExecutables);
59-
60-
// data
61-
static hsa_status_t Memcpy(hsa_signal_t, void *, const void *, size_t);
62-
static hsa_status_t Memfree(void *);
63-
static hsa_status_t HostMalloc(void **ptr, size_t size,
64-
hsa_amd_memory_pool_t MemoryPool);
65-
66-
int getMaxQueueSize() const { return env_.getMaxQueueSize(); }
67-
int getDebugMode() const { return env_.getDebugMode(); }
68-
69-
protected:
70-
Runtime() = default;
71-
~Runtime() = default;
72-
Runtime(const Runtime &) = delete;
73-
Runtime &operator=(const Runtime &) = delete;
74-
75-
protected:
76-
// variable to track environment variables
77-
Environment env_;
78-
};
18+
namespace Runtime {
19+
hsa_status_t Memfree(void *);
20+
hsa_status_t HostMalloc(void **ptr, size_t size,
21+
hsa_amd_memory_pool_t MemoryPool);
22+
23+
} // namespace Runtime
24+
hsa_status_t RegisterModuleFromMemory(
25+
std::map<std::string, atl_kernel_info_t> &KernelInfoTable,
26+
std::map<std::string, atl_symbol_info_t> &SymbolInfoTable,
27+
void *module_bytes, size_t module_size, hsa_agent_t agent,
28+
hsa_status_t (*on_deserialized_data)(void *data, size_t size,
29+
void *cb_state),
30+
void *cb_state, std::vector<hsa_executable_t> &HSAExecutables);
7931

8032
} // namespace core
8133

openmp/libomptarget/plugins/amdgpu/impl/system.cpp

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -525,8 +525,8 @@ static hsa_status_t get_code_object_custom_metadata(
525525
size_t padding = new_offset - offset;
526526
offset = new_offset;
527527
info.arg_offsets.push_back(lcArg.offset_);
528-
DEBUG_PRINT("Arg[%lu] \"%s\" (%u, %u)\n", i, lcArg.name_.c_str(),
529-
lcArg.size_, lcArg.offset_);
528+
DP("Arg[%lu] \"%s\" (%u, %u)\n", i, lcArg.name_.c_str(), lcArg.size_,
529+
lcArg.offset_);
530530
offset += lcArg.size_;
531531

532532
// check if the arg is a hidden/implicit arg
@@ -541,13 +541,13 @@ static hsa_status_t get_code_object_custom_metadata(
541541
}
542542

543543
// add size of implicit args, e.g.: offset x, y and z and pipe pointer, but
544-
// in ATMI, do not count the compiler set implicit args, but set your own
545-
// implicit args by discounting the compiler set implicit args
544+
// do not count the compiler set implicit args, but set your own implicit
545+
// args by discounting the compiler set implicit args
546546
info.kernel_segment_size =
547547
(hasHiddenArgs ? kernel_explicit_args_size : kernel_segment_size) +
548548
sizeof(impl_implicit_args_t);
549-
DEBUG_PRINT("[%s: kernarg seg size] (%lu --> %u)\n", kernelName.c_str(),
550-
kernel_segment_size, info.kernel_segment_size);
549+
DP("[%s: kernarg seg size] (%lu --> %u)\n", kernelName.c_str(),
550+
kernel_segment_size, info.kernel_segment_size);
551551

552552
// kernel received, now add it to the kernel info table
553553
KernelInfoTable[kernelName] = info;
@@ -571,7 +571,7 @@ populate_InfoTables(hsa_executable_symbol_t symbol,
571571
"Symbol info extraction", get_error_string(err));
572572
return err;
573573
}
574-
DEBUG_PRINT("Exec Symbol type: %d\n", type);
574+
DP("Exec Symbol type: %d\n", type);
575575
if (type == HSA_SYMBOL_KIND_KERNEL) {
576576
err = hsa_executable_symbol_get_info(
577577
symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &name_length);
@@ -636,11 +636,10 @@ populate_InfoTables(hsa_executable_symbol_t symbol,
636636
return err;
637637
}
638638

639-
DEBUG_PRINT(
640-
"Kernel %s --> %lx symbol %u group segsize %u pvt segsize %u bytes "
641-
"kernarg\n",
642-
kernelName.c_str(), info.kernel_object, info.group_segment_size,
643-
info.private_segment_size, info.kernel_segment_size);
639+
DP("Kernel %s --> %lx symbol %u group segsize %u pvt segsize %u bytes "
640+
"kernarg\n",
641+
kernelName.c_str(), info.kernel_object, info.group_segment_size,
642+
info.private_segment_size, info.kernel_segment_size);
644643

645644
// assign it back to the kernel info table
646645
KernelInfoTable[kernelName] = info;
@@ -681,12 +680,11 @@ populate_InfoTables(hsa_executable_symbol_t symbol,
681680
return err;
682681
}
683682

684-
DEBUG_PRINT("Symbol %s = %p (%u bytes)\n", name, (void *)info.addr,
685-
info.size);
683+
DP("Symbol %s = %p (%u bytes)\n", name, (void *)info.addr, info.size);
686684
SymbolInfoTable[std::string(name)] = info;
687685
free(name);
688686
} else {
689-
DEBUG_PRINT("Symbol is an indirect function\n");
687+
DP("Symbol is an indirect function\n");
690688
}
691689
return HSA_STATUS_SUCCESS;
692690
}
@@ -730,9 +728,8 @@ hsa_status_t RegisterModuleFromMemory(
730728
err = get_code_object_custom_metadata(module_bytes, module_size,
731729
KernelInfoTable);
732730
if (err != HSA_STATUS_SUCCESS) {
733-
DEBUG_PRINT("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
734-
"Getting custom code object metadata",
735-
get_error_string(err));
731+
DP("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
732+
"Getting custom code object metadata", get_error_string(err));
736733
continue;
737734
}
738735

@@ -741,8 +738,8 @@ hsa_status_t RegisterModuleFromMemory(
741738
err = hsa_code_object_deserialize(module_bytes, module_size, NULL,
742739
&code_object);
743740
if (err != HSA_STATUS_SUCCESS) {
744-
DEBUG_PRINT("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
745-
"Code Object Deserialization", get_error_string(err));
741+
DP("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
742+
"Code Object Deserialization", get_error_string(err));
746743
continue;
747744
}
748745
assert(0 != code_object.handle);
@@ -763,16 +760,16 @@ hsa_status_t RegisterModuleFromMemory(
763760
err =
764761
hsa_executable_load_code_object(executable, agent, code_object, NULL);
765762
if (err != HSA_STATUS_SUCCESS) {
766-
DEBUG_PRINT("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
767-
"Loading the code object", get_error_string(err));
763+
DP("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
764+
"Loading the code object", get_error_string(err));
768765
continue;
769766
}
770767

771768
// cannot iterate over symbols until executable is frozen
772769
}
773770
module_load_success = true;
774771
} while (0);
775-
DEBUG_PRINT("Modules loaded successful? %d\n", module_load_success);
772+
DP("Modules loaded successful? %d\n", module_load_success);
776773
if (module_load_success) {
777774
/* Freeze the executable; it can now be queried for symbols. */
778775
err = hsa_executable_freeze(executable, "");

openmp/libomptarget/plugins/amdgpu/impl/utils.cpp

Lines changed: 0 additions & 39 deletions
This file was deleted.

0 commit comments

Comments
 (0)