Skip to content

Commit 9893f74

Browse files
authored
[SYCLomatic][PTX] Support migration of PTX instruction neg.f16x2 (#2775)
Signed-off-by: chenwei.sun <chenwei.sun@intel.com>
1 parent 7b4200e commit 9893f74

File tree

2 files changed

+23
-1
lines changed

2 files changed

+23
-1
lines changed

clang/lib/DPCT/RulesAsm/AsmMigration.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1575,6 +1575,7 @@ class SYCLGen : public SYCLGenBase {
15751575

15761576
if (Type->getKind() != InlineAsmBuiltinType::s16 &&
15771577
Type->getKind() != InlineAsmBuiltinType::s32 &&
1578+
Type->getKind() != InlineAsmBuiltinType::f16x2 &&
15781579
Type->getKind() != InlineAsmBuiltinType::s64) {
15791580
return SYCLGenError();
15801581
}
@@ -1594,7 +1595,14 @@ class SYCLGen : public SYCLGenBase {
15941595

15951596
if (Inst->is(asmtok::op_abs))
15961597
OS() << MapNames::getClNamespace() << "abs(" << Op << ")";
1597-
else
1598+
else if (Inst->is(asmtok::op_neg) &&
1599+
Type->getKind() == InlineAsmBuiltinType::f16x2) {
1600+
std::string FormatTemp =
1601+
"(-sycl::vec<int, 1>({0}).as<sycl::vec<sycl::half, "
1602+
"2>>()).as<sycl::vec<int, 1>>().x()";
1603+
1604+
OS() << llvm::formatv(FormatTemp.c_str(), Op);
1605+
} else
15981606
OS() << "-" << Op;
15991607

16001608
endstmt();

clang/test/dpct/asm/neg.cu

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
// clang-format off
88
#include <cuda_runtime.h>
9+
#include <cuda_fp16.h>
910
#include <cstdint>
1011

1112
__global__ void neg() {
@@ -24,4 +25,17 @@ __global__ void neg() {
2425
asm("neg.s64 %0, %1;" : "=r"(i64) : "r"(x));
2526
}
2627

28+
// CHECK: inline void negate_half2(sycl::half2 *addr) {
29+
// CHECK-NEXT: unsigned reg[2];
30+
// CHECK-NEXT: reg[0] = *reinterpret_cast<unsigned int*>(addr);
31+
// CHECK-NEXT: reg[0] = (-sycl::vec<int, 1>(reg[0]).as<sycl::vec<sycl::half, 2>>()).as<sycl::vec<int, 1>>().x();
32+
// CHECK-NEXT: *reinterpret_cast<unsigned int*>(addr) = reg[0];
33+
// CHECK-NEXT:}
34+
__device__ inline void negate_half2(__half2 *addr) {
35+
unsigned reg[2];
36+
reg[0] = *reinterpret_cast<unsigned int*>(addr);
37+
asm volatile("neg.f16x2 %0, %0;" : "+r"(reg[0]));
38+
*reinterpret_cast<unsigned int*>(addr) = reg[0];
39+
}
40+
2741
// clang-format on

0 commit comments

Comments
 (0)