Skip to content

Commit 902b8fd

Browse files
authored
Replace hostexec with upstream rpc (llvm#802)
2 parents 845edee + 4dda512 commit 902b8fd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+2462
-5186
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6452,9 +6452,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
64526452
getTarget().getTriple().isSPIRV()) &&
64536453
getLangOpts().HIP)
64546454
return EmitAMDGPUDevicePrintfCallExpr(E, ReturnValue);
6455-
else if (getLangOpts().OpenMP)
6456-
return EmitHostexecAllocAndExecFns(E, "printf_allocate",
6457-
"printf_execute");
64586455
}
64596456
break;
64606457
case Builtin::BI__builtin_canonicalize:

clang/lib/CodeGen/CGEmitEmissaryExec.cpp

Lines changed: 387 additions & 0 deletions
Large diffs are not rendered by default.

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6158,17 +6158,20 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType,
61586158
StaticOperator = true;
61596159
}
61606160

6161-
// GPUs can execute hostexec variadic functions, printf, and fprintf on host.
6162-
if ((CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()) &&
6163-
CGM.getLangOpts().OpenMP && FnType &&
6161+
// Emit __llvm_omp_emissary_rpc for stubs of emissary APIs.
6162+
if ((CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()) && FnType &&
61646163
dyn_cast<FunctionProtoType>(FnType) &&
6165-
dyn_cast<FunctionProtoType>(FnType)->isVariadic() &&
6166-
(std::find(std::begin(HostexecFns), std::end(HostexecFns),
6167-
E->getDirectCallee()->getNameAsString()) !=
6168-
std::end(HostexecFns)))
6169-
return EmitHostexecAllocAndExecFns(
6170-
E, E->getDirectCallee()->getNameAsString().append("_allocate").c_str(),
6171-
E->getDirectCallee()->getNameAsString().append("_execute").c_str());
6164+
dyn_cast<FunctionProtoType>(FnType)->isVariadic()) {
6165+
// This is a variadic function in a device compile
6166+
// if (emissary_exec || (openmp && (fprintf || printf))
6167+
if ((E->getDirectCallee()->getNameAsString() == "_emissary_exec") ||
6168+
// FIXME: do not call for fprintf or printf if device libc is active
6169+
(CGM.getLangOpts().OpenMP &&
6170+
((E->getDirectCallee()->getNameAsString() == "fprintf") ||
6171+
(E->getDirectCallee()->getNameAsString() == "printf")))) {
6172+
return EmitEmissaryExec(E);
6173+
}
6174+
}
61726175

61736176
auto Arguments = E->arguments();
61746177
if (StaticOperator) {

clang/lib/CodeGen/CGGPUBuiltin.cpp

Lines changed: 0 additions & 358 deletions
Large diffs are not rendered by default.

clang/lib/CodeGen/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ add_clang_library(clangCodeGen
6161
CGAtomic.cpp
6262
CGBlocks.cpp
6363
CGBuiltin.cpp
64+
CGEmitEmissaryExec.cpp
6465
CGCUDANV.cpp
6566
CGCUDARuntime.cpp
6667
CGCXX.cpp

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4727,13 +4727,7 @@ class CodeGenFunction : public CodeGenTypeCache {
47274727
RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E,
47284728
ReturnValueSlot ReturnValue);
47294729

4730-
std::vector<std::string> HostexecFns{
4731-
"printf", "fprintf", "hostexec", "hostexec_uint",
4732-
"hostexec_uint64", "hostexec_int", "hostexec_long", "hostexec_float",
4733-
"hostexec_double", "hostexec_fortrt"};
4734-
RValue EmitHostexecAllocAndExecFns(const CallExpr *E,
4735-
const char *allocate_name,
4736-
const char *execute_name);
4730+
RValue EmitEmissaryExec(const CallExpr *E);
47374731

47384732
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
47394733
const CallExpr *E, ReturnValueSlot ReturnValue);

clang/test/OpenMP/allow-kernelc-io.c

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ int main(void) {
3131
}
3232

3333

34-
// CHECK-NOPE-LABEL: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25(
34+
// CHECK-NOPE-LABEL: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25(
3535
// CHECK-NOPE-SAME: ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
3636
// CHECK-NOPE-NEXT: entry:
3737
// CHECK-NOPE-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -41,27 +41,31 @@ int main(void) {
4141
// CHECK-NOPE-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
4242
// CHECK-NOPE-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
4343
// CHECK-NOPE: user_code.entry:
44-
// CHECK-NOPE-NEXT: [[TMP1:%.*]] = call ptr @printf_allocate(i32 27)
44+
// CHECK-NOPE-NEXT: [[TMP1:%.*]] = call ptr @__llvm_omp_emissary_premalloc(i32 39)
4545
// CHECK-NOPE-NEXT: [[VARFN_ARGS_STORE_CASTED:%.*]] = addrspacecast ptr [[TMP1]] to ptr addrspace(1)
4646
// CHECK-NOPE-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[VARFN_ARGS_STORE:%.*]], ptr addrspace(1) [[VARFN_ARGS_STORE_CASTED]], i32 0, i32 0
47-
// CHECK-NOPE-NEXT: store i32 16, ptr addrspace(1) [[TMP2]], align 4
47+
// CHECK-NOPE-NEXT: store i32 28, ptr addrspace(1) [[TMP2]], align 4
4848
// CHECK-NOPE-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[VARFN_ARGS_STORE]], ptr addrspace(1) [[VARFN_ARGS_STORE_CASTED]], i32 0, i32 1
49-
// CHECK-NOPE-NEXT: store i32 1, ptr addrspace(1) [[TMP3]], align 4
49+
// CHECK-NOPE-NEXT: store i32 2, ptr addrspace(1) [[TMP3]], align 4
5050
// CHECK-NOPE-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[VARFN_ARGS_STORE]], ptr addrspace(1) [[VARFN_ARGS_STORE_CASTED]], i32 0, i32 2
51-
// CHECK-NOPE-NEXT: store i32 917505, ptr addrspace(1) [[TMP4]], align 4
51+
// CHECK-NOPE-NEXT: store i32 786496, ptr addrspace(1) [[TMP4]], align 4
5252
// CHECK-NOPE-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[VARFN_ARGS_STORE]], ptr addrspace(1) [[VARFN_ARGS_STORE_CASTED]], i32 0, i32 3
53-
// CHECK-NOPE-NEXT: store i32 11, ptr addrspace(1) [[TMP5]], align 4
54-
// CHECK-NOPE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[VARFN_ARGS_STORE_CASTED]], i64 16
55-
// CHECK-NOPE-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 1 [[TMP6]], ptr align 1 addrspacecast (ptr addrspace(4) @.str to ptr), i64 11, i1 false)
56-
// CHECK-NOPE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 11
57-
// CHECK-NOPE-NEXT: [[TMP8:%.*]] = call i32 @printf_execute(ptr [[TMP1]], i32 27)
53+
// CHECK-NOPE-NEXT: store i32 917505, ptr addrspace(1) [[TMP5]], align 4
54+
// CHECK-NOPE-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[VARFN_ARGS_STORE]], ptr addrspace(1) [[VARFN_ARGS_STORE_CASTED]], i32 0, i32 4
55+
// CHECK-NOPE-NEXT: store i64 8589934593, ptr addrspace(1) [[TMP6:%.*]], align 8
56+
// CHECK-NOPE-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[VARFN_ARGS_STORE]], ptr addrspace(1) [[VARFN_ARGS_STORE_CASTED]], i32 0, i32 5
57+
// CHECK-NOPE-NEXT: store i32 11, ptr addrspace(1) [[TMP7:%.*]], align 4
58+
// CHECK-NOPE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[VARFN_ARGS_STORE_CASTED]], i64 28
59+
// CHECK-NOPE-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 1 [[TMP8]], ptr align 1 addrspacecast (ptr addrspace(4) @.str to ptr), i64 11, i1 false)
60+
// CHECK-NOPE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP8]], i64 11
61+
// CHECK-NOPE-NEXT: [[TMP10:%.*]] = call i64 @__llvm_omp_emissary_rpc(i64 8589934593, ptr [[TMP1]])
5862
// CHECK-NOPE-NEXT: call void @__kmpc_target_deinit()
5963
// CHECK-NOPE-NEXT: ret void
6064
// CHECK-NOPE: worker.exit:
6165
// CHECK-NOPE-NEXT: ret void
6266
//
6367
//
64-
// CHECK-LABEL: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25(
68+
// CHECK-LABEL: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25(
6569
// CHECK-SAME: ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
6670
// CHECK-NEXT: entry:
6771
// CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -71,20 +75,24 @@ int main(void) {
7175
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
7276
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
7377
// CHECK: user_code.entry:
74-
// CHECK-NEXT: [[TMP1:%.*]] = call ptr @printf_allocate(i32 27)
78+
// CHECK-NEXT: [[TMP1:%.*]] = call ptr @__llvm_omp_emissary_premalloc(i32 39)
7579
// CHECK-NEXT: [[VARFN_ARGS_STORE_CASTED:%.*]] = addrspacecast ptr [[TMP1]] to ptr addrspace(1)
7680
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[VARFN_ARGS_STORE:%.*]], ptr addrspace(1) [[VARFN_ARGS_STORE_CASTED]], i32 0, i32 0
77-
// CHECK-NEXT: store i32 16, ptr addrspace(1) [[TMP2]], align 4
81+
// CHECK-NEXT: store i32 28, ptr addrspace(1) [[TMP2]], align 4
7882
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[VARFN_ARGS_STORE]], ptr addrspace(1) [[VARFN_ARGS_STORE_CASTED]], i32 0, i32 1
79-
// CHECK-NEXT: store i32 1, ptr addrspace(1) [[TMP3]], align 4
83+
// CHECK-NEXT: store i32 2, ptr addrspace(1) [[TMP3]], align 4
8084
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[VARFN_ARGS_STORE]], ptr addrspace(1) [[VARFN_ARGS_STORE_CASTED]], i32 0, i32 2
81-
// CHECK-NEXT: store i32 917505, ptr addrspace(1) [[TMP4]], align 4
85+
// CHECK-NEXT: store i32 786496, ptr addrspace(1) [[TMP4]], align 4
8286
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[VARFN_ARGS_STORE]], ptr addrspace(1) [[VARFN_ARGS_STORE_CASTED]], i32 0, i32 3
83-
// CHECK-NEXT: store i32 11, ptr addrspace(1) [[TMP5]], align 4
84-
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[VARFN_ARGS_STORE_CASTED]], i64 16
85-
// CHECK-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 1 [[TMP6]], ptr align 1 addrspacecast (ptr addrspace(4) @.str to ptr), i64 11, i1 false)
86-
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 11
87-
// CHECK-NEXT: [[TMP8:%.*]] = call i32 @printf_execute(ptr [[TMP1]], i32 27)
87+
// CHECK-NEXT: store i32 917505, ptr addrspace(1) [[TMP5]], align 4
88+
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[VARFN_ARGS_STORE]], ptr addrspace(1) [[VARFN_ARGS_STORE_CASTED]], i32 0, i32 4
89+
// CHECK-NEXT: store i64 8589934593, ptr addrspace(1) [[TMP6:%.*]], align 8
90+
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[VARFN_ARGS_STORE]], ptr addrspace(1) [[VARFN_ARGS_STORE_CASTED]], i32 0, i32 5
91+
// CHECK-NEXT: store i32 11, ptr addrspace(1) [[TMP7:%.*]], align 4
92+
// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[VARFN_ARGS_STORE_CASTED]], i64 28
93+
// CHECK-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 1 [[TMP8]], ptr align 1 addrspacecast (ptr addrspace(4) @.str to ptr), i64 11, i1 false)
94+
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP8]], i64 11
95+
// CHECK-NEXT: [[TMP10:%.*]] = call i64 @__llvm_omp_emissary_rpc(i64 8589934593, ptr [[TMP1]])
8896
// CHECK-NEXT: call void @__kmpc_target_deinit()
8997
// CHECK-NEXT: ret void
9098
// CHECK: worker.exit:

flang/module/f90deviceio.f90

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
! f90print f90printi f90printf f90printd interfaces
2+
! in module file f90deviceio
3+
module f90deviceio
4+
interface
5+
subroutine f90print(N)
6+
character(*) :: N
7+
!$omp declare target (f90print)
8+
end subroutine f90print
9+
subroutine f90printi(N,i)
10+
character(*) :: N
11+
integer :: i
12+
!$omp declare target (f90printi)
13+
end subroutine f90printi
14+
subroutine f90printl(N,i)
15+
character(*) :: N
16+
integer(8) :: i
17+
!$omp declare target (f90printl)
18+
end subroutine f90printl
19+
subroutine f90printf(N,f)
20+
character(*) :: N
21+
real(4) :: f
22+
!$omp declare target (f90printf)
23+
end subroutine f90printf
24+
subroutine f90printd(N,d)
25+
character(*) :: N
26+
real(8) :: d
27+
!$omp declare target (f90printd)
28+
end subroutine f90printd
29+
end interface
30+
end module
31+

flang/tools/f18/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ set(MODULES_WITHOUT_IMPLEMENTATION
2828
"ieee_features"
2929
"iso_c_binding"
3030
"iso_fortran_env"
31+
"f90deviceio"
3132
)
3233

3334
set(MODULES ${MODULES_WITH_IMPLEMENTATION} ${MODULES_WITHOUT_IMPLEMENTATION})

llvm/runtimes/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,7 @@ if(build_runtimes)
579579
if (${LIBOMPTARGET_BUILD_DEVICE_FORTRT})
580580
set(FORTRT_DEP FortranRuntime)
581581
endif()
582-
foreach(dep opt llvm-link llvm-extract clang clang-offload-packager rocm-device-libs offload-arch amdgcn_hostexec_services ${HSA_DEP} ${AMDGPU_ARCH_DEP} ${FORTRT_DEP})
582+
foreach(dep opt llvm-link llvm-extract clang clang-offload-packager rocm-device-libs offload-arch ${HSA_DEP} ${AMDGPU_ARCH_DEP} ${FORTRT_DEP})
583583
if(TARGET ${dep} AND OPENMP_ENABLE_LIBOMPTARGET)
584584
list(APPEND extra_deps ${dep})
585585
endif()

0 commit comments

Comments
 (0)