Skip to content

Commit b4534dd

Browse files
authored
Merge pull request llvm#490 from AMD-Lightning-Internal/amd/dev/rlieberm/jhuber-preland-123673
preland 123673 [OpenMP] Port the OpenMP device runtime to direct C++ compila…
2 parents b1c8b75 + f120a43 commit b4534dd

31 files changed

+56
-245
lines changed

clang/lib/Headers/__clang_hip_math.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,11 @@
2121
#include <limits.h>
2222
#include <stdint.h>
2323
#ifdef __OPENMP_AMDGCN__
24+
// FIXME: A hack for the OpenMP DeviceRTL's `LibM.h` that should be removed.
25+
#ifndef __OPENMP_SKIP_INCLUDE__
2426
#include <omp.h>
2527
#endif
28+
#endif
2629
#endif // !defined(__HIPCC_RTC__)
2730

2831
// __DEVICE__ is a helper macro with common set of attributes for the wrappers

offload/DeviceRTL/CMakeLists.txt

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ set(src_files
9090
# propagation. That said, we will run the vectorizer again after the runtime
9191
# has been linked into the user program.
9292
set(clang_opt_flags -O3 -mllvm -openmp-opt-disable -DSHARED_SCRATCHPAD_SIZE=512 -mllvm -vectorize-slp=false )
93-
set(link_opt_flags -O3 -openmp-opt-disable -attributor-enable=module -vectorize-slp=false )
93+
set(link_opt_flags -O3 -openmp-opt-disable -attributor-enable=module -vectorize-slp=false )
9494
set(link_export_flag -passes=internalize -internalize-public-api-file=${source_directory}/exports)
9595

9696
# If the user built with the GPU C library enabled we will use that instead.
@@ -103,11 +103,10 @@ set (LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL "${LIBOMPTARGET_LLVM_INCLUDE_DIRS}
103103
list(TRANSFORM LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL PREPEND "-I")
104104

105105
# Set flags for LLVM Bitcode compilation.
106-
set(bc_flags -c -foffload-lto -std=c++17 -fvisibility=hidden
107-
${clang_opt_flags} --offload-device-only
108-
-nocudalib -nogpulib -nogpuinc
109-
-fopenmp -fopenmp-cuda-mode
110-
-Wno-unknown-cuda-version -Wno-openmp-target
106+
set(bc_flags -c -flto -std=c++17 -fvisibility=hidden
107+
${clang_opt_flags} -nocudalib -nogpulib
108+
-nogpuinc -nostdlibinc -Wno-unknown-cuda-version
109+
-fno-rtti -fno-exceptions -fconvergent-functions
111110
-I${CMAKE_BINARY_DIR}/openmp/runtime/src # Need omp.h for LibM.
112111
-I${CMAKE_BINARY_DIR}/projects/openmp/runtime/src # Need omp.h for LibM.
113112
-I${CMAKE_BINARY_DIR}/runtime/src
@@ -139,9 +138,8 @@ function(compileDeviceRTLLibrary target_name target_triple)
139138
add_custom_target(${outfile}
140139
COMMAND ${CLANG_TOOL}
141140
${bc_flags}
142-
-fopenmp-targets=${target_triple}
143-
-Xopenmp-target=${target_triple} -march=
144141
${target_bc_flags}
142+
--target=${target_triple}
145143
-MD -MF ${depfile}
146144
${infile} -o ${outfile}
147145
DEPENDS ${infile} ${include_files}
@@ -310,10 +308,7 @@ function(compileDeviceRTLLibrary target_name target_triple)
310308
set(ide_target_name omptarget-ide-${target_name})
311309
add_library(${ide_target_name} STATIC EXCLUDE_FROM_ALL ${src_files})
312310
target_compile_options(${ide_target_name} PRIVATE
313-
-fopenmp -fopenmp-cuda-mode
314-
-fopenmp-targets=${target_triple} -Xopenmp-target=${target_triple} -march=
315-
-mllvm -openmp-opt-disable
316-
-foffload-lto -fvisibility=hidden --offload-device-only
311+
--target=${target_triple} -flto -fvisibility=hidden
317312
-nocudalib -nogpulib -nogpuinc -nostdlibinc -Wno-unknown-cuda-version
318313
)
319314
target_compile_definitions(${ide_target_name} PRIVATE SHARED_SCRATCHPAD_SIZE=512)

offload/DeviceRTL/include/Allocator.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717
// Forward declaration.
1818
struct KernelEnvironmentTy;
1919

20-
#pragma omp begin declare target device_type(nohost)
21-
2220
namespace ompx {
2321

2422
namespace allocator {
@@ -44,6 +42,4 @@ extern "C" {
4442
[[gnu::weak]] void free(void *Ptr);
4543
}
4644

47-
#pragma omp end declare target
48-
4945
#endif

offload/DeviceRTL/include/DevRTLExtras.h

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,7 @@
1313
#ifndef OPENMP_LIBOMPTARGET_DEVICERTL_INCLUDE_DEVRTLEXTRAS_H
1414
#define OPENMP_LIBOMPTARGET_DEVICERTL_INCLUDE_DEVRTLEXTRAS_H
1515

16-
/// Base type declarations for freestanding mode
17-
///
18-
///{
19-
using uint64_t = unsigned long;
20-
// TODO: Properly implement this
21-
using uintptr_t = uint64_t;
22-
///}
16+
#include <stdint.h>
2317

2418
/// Macros for allocating variables in different address spaces.
2519
///{

offload/DeviceRTL/include/DeviceTypes.h

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -100,14 +100,7 @@ struct TaskDescriptorTy {
100100
TaskFnTy TaskFn;
101101
};
102102

103-
#pragma omp begin declare variant match(device = {arch(amdgcn)})
104103
using LaneMaskTy = uint64_t;
105-
#pragma omp end declare variant
106-
107-
#pragma omp begin declare variant match( \
108-
device = {arch(amdgcn)}, implementation = {extension(match_none)})
109-
using LaneMaskTy = uint64_t;
110-
#pragma omp end declare variant
111104

112105
namespace lanes {
113106
enum : LaneMaskTy { All = ~(LaneMaskTy)0 };
@@ -164,8 +157,7 @@ typedef enum omp_allocator_handle_t {
164157
#define OMP_PRAGMA(STR) __PRAGMA(omp STR)
165158

166159
#define SHARED(NAME) \
167-
NAME [[clang::loader_uninitialized]]; \
168-
OMP_PRAGMA(allocate(NAME) allocator(omp_pteam_mem_alloc))
160+
[[clang::address_space(3)]] NAME [[clang::loader_uninitialized]];
169161

170162
// TODO: clang should use address space 5 for omp_thread_mem_alloc, but right
171163
// now that's not the case.

offload/DeviceRTL/include/DeviceUtils.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@
1515
#include "DeviceTypes.h"
1616
#include "Shared/Utils.h"
1717

18-
#pragma omp begin declare target device_type(nohost)
19-
2018
namespace utils {
2119

2220
template <typename T> struct type_identity {
@@ -95,6 +93,4 @@ bool isThreadLocalMemPtr(void *Ptr);
9593

9694
} // namespace utils
9795

98-
#pragma omp end declare target
99-
10096
#endif

offload/DeviceRTL/include/Mapping.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,8 @@ enum {
2424
DIM_Z = 2,
2525
};
2626

27-
#pragma omp begin declare target device_type(nohost)
28-
2927
inline constexpr uint32_t MaxThreadsPerTeam = 1024;
3028

31-
#pragma omp end declare target
32-
3329
/// Initialize the mapping machinery.
3430
void init(bool IsSPMD);
3531

offload/DeviceRTL/include/State.h

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@
2323
// Forward declaration.
2424
struct KernelEnvironmentTy;
2525

26-
#pragma omp begin declare target device_type(nohost)
27-
2826
namespace ompx {
2927

3028
namespace memory {
@@ -89,8 +87,7 @@ struct TeamStateTy {
8987
ParallelRegionFnTy ParallelRegionFnVar;
9088
};
9189

92-
extern TeamStateTy TeamState;
93-
#pragma omp allocate(TeamState) allocator(omp_pteam_mem_alloc)
90+
extern TeamStateTy [[clang::address_space(3)]] TeamState;
9491

9592
struct ThreadStateTy {
9693

@@ -116,8 +113,7 @@ struct ThreadStateTy {
116113
}
117114
};
118115

119-
extern ThreadStateTy **ThreadStates;
120-
#pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc)
116+
extern ThreadStateTy **[[clang::address_space(3)]] ThreadStates;
121117

122118
/// Initialize the state machinery. Must be called by all threads.
123119
void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
@@ -379,6 +375,4 @@ inline state::Value<uint32_t, state::VK_RunSched> RunSched;
379375

380376
} // namespace ompx
381377

382-
#pragma omp end declare target
383-
384378
#endif

offload/DeviceRTL/include/Synchronization.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@
1515
#include "DeviceTypes.h"
1616
#include "DeviceUtils.h"
1717

18-
#pragma omp begin declare target device_type(nohost)
19-
2018
namespace ompx {
2119
namespace atomic {
2220

@@ -221,6 +219,4 @@ void system(atomic::OrderingTy Ordering);
221219

222220
} // namespace ompx
223221

224-
#pragma omp end declare target
225-
226222
#endif

offload/DeviceRTL/include/Workshare.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
#ifndef OMPTARGET_WORKSHARE_H
1313
#define OMPTARGET_WORKSHARE_H
1414

15-
#pragma omp begin declare target device_type(nohost)
16-
1715
namespace ompx {
1816

1917
namespace workshare {
@@ -25,6 +23,4 @@ void init(bool IsSPMD);
2523

2624
} // namespace ompx
2725

28-
#pragma omp end declare target
29-
3026
#endif

0 commit comments

Comments
 (0)