Skip to content

Commit 6ec8da4

Browse files
authored
[SYCLomatic][PTX] Support migration of PTX instruction cvt.rn.f16x2.f32 (#2735)
Signed-off-by: chenwei.sun <chenwei.sun@intel.com>
1 parent 2039135 commit 6ec8da4

File tree

2 files changed

+54
-0
lines changed

2 files changed

+54
-0
lines changed

clang/lib/DPCT/RulesAsm/AsmMigration.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2428,7 +2428,44 @@ class SYCLGen : public SYCLGenBase {
24282428
}
24292429
}
24302430

2431+
bool HandleCvtVec(const InlineAsmInstruction *Inst) {
2432+
2433+
if (emitStmt(Inst->getOutputOperand()))
2434+
return SYCLGenError();
2435+
std::string Op;
2436+
if (tryEmitStmt(Op, Inst->getInputOperand(0)))
2437+
return SYCLGenError();
2438+
OS() << " = ";
2439+
std::string FormatTemp =
2440+
"(sycl::ushort2(sycl::vec<float, 1>({0}).convert<sycl::half, "
2441+
"sycl::rounding_mode::rte>().as<sycl::vec<uint16_t, 1>>().x(),"
2442+
"sycl::vec<float, 1>({1}).convert<sycl::half, "
2443+
"sycl::rounding_mode::rte>().as<sycl::vec<uint16_t, 1>>().x()))"
2444+
".as<sycl::vec<int, 1>>().x()";
2445+
2446+
std::string InputOp[2];
2447+
for (unsigned I = 0; I < Inst->getNumInputOperands(); ++I) {
2448+
if (tryEmitStmt(InputOp[I], Inst->getInputOperand(I)))
2449+
return SYCLGenError();
2450+
if (Inst->hasAttr(InstAttr::sat))
2451+
InputOp[I] = Cast(Inst->getType(0), Inst->getInputOperand(I)->getType(),
2452+
InputOp[I]);
2453+
}
2454+
2455+
OS() << llvm::formatv(FormatTemp.c_str(), InputOp[1], InputOp[0]);
2456+
2457+
endstmt();
2458+
return SYCLGenSuccess();
2459+
}
2460+
24312461
bool handle_cvt(const InlineAsmInstruction *Inst) override {
2462+
2463+
if (Inst->getNumInputOperands() == 2 && Inst->getNumTypes() == 2 &&
2464+
isa<InlineAsmBuiltinType>(Inst->getType(0)) &&
2465+
isa<InlineAsmBuiltinType>(Inst->getType(1))) {
2466+
return HandleCvtVec(Inst);
2467+
}
2468+
24322469
if (Inst->getNumInputOperands() != 1 || Inst->getNumTypes() != 2 ||
24332470
!isa<InlineAsmBuiltinType>(Inst->getType(0)) ||
24342471
!isa<InlineAsmBuiltinType>(Inst->getType(1)))

clang/test/dpct/asm/cvt.cu

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,4 +400,21 @@ __global__ void cvt() {
400400
}
401401
#endif
402402

403+
// CHECK: inline uint32_t float2_to_half2(sycl::float2 f) {
404+
// CHECK-NEXT: union {
405+
// CHECK-NEXT: uint32_t u32;
406+
// CHECK-NEXT: uint16_t u16[2];
407+
// CHECK-NEXT: } tmp;
408+
// CHECK-NEXT: tmp.u32 = (sycl::ushort2(sycl::vec<float, 1>(f.x()).convert<sycl::half, sycl::rounding_mode::rte>().as<sycl::vec<uint16_t, 1>>().x(),sycl::vec<float, 1>(f.y()).convert<sycl::half, sycl::rounding_mode::rte>().as<sycl::vec<uint16_t, 1>>().x())).as<sycl::vec<int, 1>>().x();
409+
// CHECK-NEXT: return tmp.u32;
410+
// CHECK-NEXT: }
411+
inline __device__ uint32_t float2_to_half2(float2 f) {
412+
union {
413+
uint32_t u32;
414+
uint16_t u16[2];
415+
} tmp;
416+
asm volatile("cvt.rn.f16x2.f32 %0, %1, %2;\n" : "=r"(tmp.u32) : "f"(f.y), "f"(f.x));
417+
return tmp.u32;
418+
}
419+
403420
// clang-format on

0 commit comments

Comments
 (0)