Skip to content

Commit 89d9a83

Browse files
authored
[flang][cuda] Use NVVM op for barrier0 intrinsic (#140947)
The simple form of `Barrier0Op` is available in the NVVM dialect. It is needed to use it instead of the string version since #140615
1 parent 5ba57a8 commit 89d9a83

File tree

2 files changed

+3
-8
lines changed

2 files changed

+3
-8
lines changed

flang/lib/Optimizer/Builder/IntrinsicCall.cpp

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8332,12 +8332,7 @@ IntrinsicLibrary::genSum(mlir::Type resultType,
83328332

83338333
// SYNCTHREADS
83348334
void IntrinsicLibrary::genSyncThreads(llvm::ArrayRef<fir::ExtendedValue> args) {
8335-
constexpr llvm::StringLiteral funcName = "llvm.nvvm.barrier0";
8336-
mlir::FunctionType funcType =
8337-
mlir::FunctionType::get(builder.getContext(), {}, {});
8338-
auto funcOp = builder.createFunction(loc, funcName, funcType);
8339-
llvm::SmallVector<mlir::Value> noArgs;
8340-
builder.create<fir::CallOp>(loc, funcOp, noArgs);
8335+
builder.create<mlir::NVVM::Barrier0Op>(loc);
83418336
}
83428337

83438338
// SYNCTHREADS_AND

flang/test/Lower/CUDA/cuda-device-proc.cuf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ attributes(global) subroutine devsub()
4949
end
5050

5151
! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
52-
! CHECK: fir.call @llvm.nvvm.barrier0() fastmath<contract> : () -> ()
52+
! CHECK: nvvm.barrier0
5353
! CHECK: fir.call @llvm.nvvm.bar.warp.sync(%c1{{.*}}) fastmath<contract> : (i32) -> ()
5454
! CHECK: fir.call @llvm.nvvm.membar.gl() fastmath<contract> : () -> ()
5555
! CHECK: fir.call @llvm.nvvm.membar.cta() fastmath<contract> : () -> ()
@@ -106,7 +106,7 @@ end
106106

107107
! CHECK-LABEL: func.func @_QPhost1()
108108
! CHECK: cuf.kernel
109-
! CHECK: fir.call @llvm.nvvm.barrier0() fastmath<contract> : () -> ()
109+
! CHECK: nvvm.barrier0
110110
! CHECK: fir.call @llvm.nvvm.bar.warp.sync(%c1{{.*}}) fastmath<contract> : (i32) -> ()
111111
! CHECK: fir.call @llvm.nvvm.barrier0.and(%c1{{.*}}) fastmath<contract> : (i32) -> i32
112112
! CHECK: fir.call @llvm.nvvm.barrier0.popc(%c1{{.*}}) fastmath<contract> : (i32) -> i32

0 commit comments

Comments
 (0)