Skip to content

Commit a7a7e95

Browse files
kosarevnhaehnle
andauthored
[AMDGPU][Clang] Support bfloat16 arithmetic. (#147541)
Co-authored-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
1 parent ed06de4 commit a7a7e95

File tree

2 files changed

+29
-0
lines changed

2 files changed

+29
-0
lines changed

clang/lib/Basic/Targets/AMDGPU.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo {
439439
// pre-defined macros.
440440
bool handleTargetFeatures(std::vector<std::string> &Features,
441441
DiagnosticsEngine &Diags) override {
442+
HasFullBFloat16 = true;
442443
auto TargetIDFeatures =
443444
getAllPossibleTargetIDFeatures(getTriple(), getArchNameAMDGCN(GPUKind));
444445
for (const auto &F : Features) {

clang/test/CodeGen/AMDGPU/full-bf16.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu tahiti -emit-llvm -o - %s | FileCheck %s
3+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu fiji -emit-llvm -o - %s | FileCheck %s
4+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx900 -emit-llvm -o - %s | FileCheck %s
5+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx950 -emit-llvm -o - %s | FileCheck %s
6+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck %s
7+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck %s
8+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s
9+
10+
// CHECK-LABEL: define dso_local bfloat @div(
11+
// CHECK-SAME: bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
12+
// CHECK-NEXT: [[ENTRY:.*:]]
13+
// CHECK-NEXT: [[RETVAL:%.*]] = alloca bfloat, align 2, addrspace(5)
14+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
15+
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
16+
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
17+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
18+
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
19+
// CHECK-NEXT: store bfloat [[A]], ptr [[A_ADDR_ASCAST]], align 2
20+
// CHECK-NEXT: store bfloat [[B]], ptr [[B_ADDR_ASCAST]], align 2
21+
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR_ASCAST]], align 2
22+
// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR_ASCAST]], align 2
23+
// CHECK-NEXT: [[DIV:%.*]] = fdiv bfloat [[TMP0]], [[TMP1]]
24+
// CHECK-NEXT: ret bfloat [[DIV]]
25+
//
26+
__bf16 div(__bf16 a, __bf16 b) {
27+
return a / b;
28+
}

0 commit comments

Comments
 (0)