Skip to content

Commit 363d50e

Browse files
committed
Revert "[NVPTX] Aggressively try to replace image handles with references (#119730)"
This reverts commit f9c8c01.
1 parent 54e77bd commit 363d50e

11 files changed

+162
-231
lines changed

llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
209209
TargetMachine &TM = const_cast<TargetMachine &>(MF->getTarget());
210210
NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine &>(TM);
211211
const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
212-
StringRef Sym = MFI->getImageHandleSymbol(Index);
212+
const char *Sym = MFI->getImageHandleSymbol(Index);
213213
StringRef SymName = nvTM.getStrPool().save(Sym);
214214
MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(SymName));
215215
}
@@ -224,13 +224,16 @@ void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
224224
return;
225225
}
226226

227+
const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
227228
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
228229
const MachineOperand &MO = MI->getOperand(i);
229230

230231
MCOperand MCOp;
231-
if (lowerImageHandleOperand(MI, i, MCOp)) {
232-
OutMI.addOperand(MCOp);
233-
continue;
232+
if (!STI.hasImageHandles()) {
233+
if (lowerImageHandleOperand(MI, i, MCOp)) {
234+
OutMI.addOperand(MCOp);
235+
continue;
236+
}
234237
}
235238

236239
if (lowerOperand(MO, MCOp))
@@ -1506,14 +1509,13 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
15061509
const AttributeList &PAL = F->getAttributes();
15071510
const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
15081511
const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
1509-
const NVPTXMachineFunctionInfo *MFI =
1510-
MF ? MF->getInfo<NVPTXMachineFunctionInfo>() : nullptr;
15111512

15121513
Function::const_arg_iterator I, E;
15131514
unsigned paramIndex = 0;
15141515
bool first = true;
15151516
bool isKernelFunc = isKernelFunction(*F);
15161517
bool isABI = (STI.getSmVersion() >= 20);
1518+
bool hasImageHandles = STI.hasImageHandles();
15171519

15181520
if (F->arg_empty() && !F->isVarArg()) {
15191521
O << "()";
@@ -1531,30 +1533,25 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
15311533
first = false;
15321534

15331535
// Handle image/sampler parameters
1534-
if (isKernelFunc) {
1536+
if (isKernelFunction(*F)) {
15351537
if (isSampler(*I) || isImage(*I)) {
1536-
std::string ParamSym;
1537-
raw_string_ostream ParamStr(ParamSym);
1538-
ParamStr << F->getName() << "_param_" << paramIndex;
1539-
ParamStr.flush();
1540-
bool EmitImagePtr = !MFI || !MFI->checkImageHandleSymbol(ParamSym);
15411538
if (isImage(*I)) {
15421539
if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
1543-
if (EmitImagePtr)
1540+
if (hasImageHandles)
15441541
O << "\t.param .u64 .ptr .surfref ";
15451542
else
15461543
O << "\t.param .surfref ";
15471544
O << TLI->getParamName(F, paramIndex);
15481545
}
15491546
else { // Default image is read_only
1550-
if (EmitImagePtr)
1547+
if (hasImageHandles)
15511548
O << "\t.param .u64 .ptr .texref ";
15521549
else
15531550
O << "\t.param .texref ";
15541551
O << TLI->getParamName(F, paramIndex);
15551552
}
15561553
} else {
1557-
if (EmitImagePtr)
1554+
if (hasImageHandles)
15581555
O << "\t.param .u64 .ptr .samplerref ";
15591556
else
15601557
O << "\t.param .samplerref ";

llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,13 @@
1414
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXMACHINEFUNCTIONINFO_H
1515
#define LLVM_LIB_TARGET_NVPTX_NVPTXMACHINEFUNCTIONINFO_H
1616

17-
#include "llvm/ADT/StringRef.h"
1817
#include "llvm/CodeGen/MachineFunction.h"
1918

2019
namespace llvm {
2120
class NVPTXMachineFunctionInfo : public MachineFunctionInfo {
2221
private:
23-
/// Stores a mapping from index to symbol name for image handles that are
24-
/// replaced with image references
22+
/// Stores a mapping from index to symbol name for removing image handles
23+
/// on Fermi.
2524
SmallVector<std::string, 8> ImageHandleList;
2625

2726
public:
@@ -37,27 +36,20 @@ class NVPTXMachineFunctionInfo : public MachineFunctionInfo {
3736
/// Returns the index for the symbol \p Symbol. If the symbol was previously,
3837
/// added, the same index is returned. Otherwise, the symbol is added and the
3938
/// new index is returned.
40-
unsigned getImageHandleSymbolIndex(StringRef Symbol) {
39+
unsigned getImageHandleSymbolIndex(const char *Symbol) {
4140
// Is the symbol already present?
4241
for (unsigned i = 0, e = ImageHandleList.size(); i != e; ++i)
43-
if (ImageHandleList[i] == Symbol)
42+
if (ImageHandleList[i] == std::string(Symbol))
4443
return i;
4544
// Nope, insert it
46-
ImageHandleList.push_back(Symbol.str());
45+
ImageHandleList.push_back(Symbol);
4746
return ImageHandleList.size()-1;
4847
}
4948

5049
/// Returns the symbol name at the given index.
51-
StringRef getImageHandleSymbol(unsigned Idx) const {
50+
const char *getImageHandleSymbol(unsigned Idx) const {
5251
assert(ImageHandleList.size() > Idx && "Bad index");
53-
return ImageHandleList[Idx];
54-
}
55-
56-
/// Check if the symbol has a mapping. Having a mapping means the handle is
57-
/// replaced with a reference
58-
bool checkImageHandleSymbol(StringRef Symbol) const {
59-
return ImageHandleList.end() !=
60-
std::find(ImageHandleList.begin(), ImageHandleList.end(), Symbol);
52+
return ImageHandleList[Idx].c_str();
6153
}
6254
};
6355
}

llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1830,7 +1830,7 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) {
18301830
NewSymStr << MF.getName() << "_param_" << Param;
18311831

18321832
InstrsToRemove.insert(&TexHandleDef);
1833-
Idx = MFI->getImageHandleSymbolIndex(NewSymStr.str());
1833+
Idx = MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str());
18341834
return true;
18351835
}
18361836
case NVPTX::texsurf_handles: {
@@ -1839,7 +1839,7 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) {
18391839
const GlobalValue *GV = TexHandleDef.getOperand(1).getGlobal();
18401840
assert(GV->hasName() && "Global sampler must be named!");
18411841
InstrsToRemove.insert(&TexHandleDef);
1842-
Idx = MFI->getImageHandleSymbolIndex(GV->getName());
1842+
Idx = MFI->getImageHandleSymbolIndex(GV->getName().data());
18431843
return true;
18441844
}
18451845
case NVPTX::nvvm_move_i64:

llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,
5656
const std::string &FS,
5757
const NVPTXTargetMachine &TM)
5858
: NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0),
59-
FullSmVersion(200), SmVersion(getSmVersion()),
59+
FullSmVersion(200), SmVersion(getSmVersion()), TM(TM),
6060
TLInfo(TM, initializeSubtargetDependencies(CPU, FS)) {
6161
TSInfo = std::make_unique<NVPTXSelectionDAGInfo>();
6262
}
@@ -67,6 +67,16 @@ const SelectionDAGTargetInfo *NVPTXSubtarget::getSelectionDAGInfo() const {
6767
return TSInfo.get();
6868
}
6969

70+
bool NVPTXSubtarget::hasImageHandles() const {
71+
// Enable handles for Kepler+, where CUDA supports indirect surfaces and
72+
// textures
73+
if (TM.getDrvInterface() == NVPTX::CUDA)
74+
return (SmVersion >= 30);
75+
76+
// Disabled, otherwise
77+
return false;
78+
}
79+
7080
bool NVPTXSubtarget::allowFP16Math() const {
7181
return hasFP16Math() && NoF16Math == false;
7282
}

llvm/lib/Target/NVPTX/NVPTXSubtarget.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
4242
// FullSmVersion.
4343
unsigned int SmVersion;
4444

45+
const NVPTXTargetMachine &TM;
4546
NVPTXInstrInfo InstrInfo;
4647
NVPTXTargetLowering TLInfo;
4748
std::unique_ptr<const SelectionDAGTargetInfo> TSInfo;
@@ -81,6 +82,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
8182
bool hasClusters() const { return SmVersion >= 90 && PTXVersion >= 78; }
8283
bool hasLDG() const { return SmVersion >= 32; }
8384
bool hasHWROT32() const { return SmVersion >= 32; }
85+
bool hasImageHandles() const;
8486
bool hasFP16Math() const { return SmVersion >= 53; }
8587
bool hasBF16Math() const { return SmVersion >= 80; }
8688
bool allowFP16Math() const;

llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -427,10 +427,14 @@ void NVPTXPassConfig::addIRPasses() {
427427
}
428428

429429
bool NVPTXPassConfig::addInstSelector() {
430+
const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
431+
430432
addPass(createLowerAggrCopies());
431433
addPass(createAllocaHoisting());
432434
addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
433-
addPass(createNVPTXReplaceImageHandlesPass());
435+
436+
if (!ST.hasImageHandles())
437+
addPass(createNVPTXReplaceImageHandlesPass());
434438

435439
return false;
436440
}

llvm/test/CodeGen/NVPTX/surf-read-cuda.ll

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
3-
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s
1+
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20
2+
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30
43
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
54
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %}
65

@@ -10,48 +9,38 @@ declare i32 @llvm.nvvm.suld.1d.i32.trap(i64, i32)
109
declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1))
1110

1211

12+
; SM20-LABEL: .entry foo
13+
; SM30-LABEL: .entry foo
1314
define void @foo(i64 %img, ptr %red, i32 %idx) {
14-
; CHECK-LABEL: foo(
15-
; CHECK: {
16-
; CHECK-NEXT: .reg .b32 %r<3>;
17-
; CHECK-NEXT: .reg .f32 %f<2>;
18-
; CHECK-NEXT: .reg .b64 %rd<4>;
19-
; CHECK-EMPTY:
20-
; CHECK-NEXT: // %bb.0:
21-
; CHECK-NEXT: ld.param.u64 %rd1, [foo_param_0];
22-
; CHECK-NEXT: ld.param.u64 %rd2, [foo_param_1];
23-
; CHECK-NEXT: cvta.to.global.u64 %rd3, %rd2;
24-
; CHECK-NEXT: ld.param.u32 %r1, [foo_param_2];
25-
; CHECK-NEXT: suld.b.1d.b32.trap {%r2}, [%rd1, {%r1}];
26-
; CHECK-NEXT: cvt.rn.f32.s32 %f1, %r2;
27-
; CHECK-NEXT: st.global.f32 [%rd3], %f1;
28-
; CHECK-NEXT: ret;
15+
; SM20: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0];
16+
; SM20: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFREG]], {%r{{[0-9]+}}}]
17+
; SM30: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0];
18+
; SM30: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFREG]], {%r{{[0-9]+}}}]
2919
%val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %img, i32 %idx)
20+
; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
21+
; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
3022
%ret = sitofp i32 %val to float
23+
; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[REDF]]
24+
; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[REDF]]
3125
store float %ret, ptr %red
3226
ret void
3327
}
3428

3529
@surf0 = internal addrspace(1) global i64 0, align 8
3630

31+
; SM20-LABEL: .entry bar
32+
; SM30-LABEL: .entry bar
3733
define void @bar(ptr %red, i32 %idx) {
38-
; CHECK-LABEL: bar(
39-
; CHECK: {
40-
; CHECK-NEXT: .reg .b32 %r<3>;
41-
; CHECK-NEXT: .reg .f32 %f<2>;
42-
; CHECK-NEXT: .reg .b64 %rd<4>;
43-
; CHECK-EMPTY:
44-
; CHECK-NEXT: // %bb.0:
45-
; CHECK-NEXT: ld.param.u64 %rd1, [bar_param_0];
46-
; CHECK-NEXT: cvta.to.global.u64 %rd2, %rd1;
47-
; CHECK-NEXT: ld.param.u32 %r1, [bar_param_1];
48-
; CHECK-NEXT: suld.b.1d.b32.trap {%r2}, [surf0, {%r1}];
49-
; CHECK-NEXT: cvt.rn.f32.s32 %f1, %r2;
50-
; CHECK-NEXT: st.global.f32 [%rd2], %f1;
51-
; CHECK-NEXT: ret;
34+
; SM30: mov.u64 %rd[[SURFHANDLE:[0-9]+]], surf0
5235
%surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0)
36+
; SM20: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [surf0, {%r{{[0-9]+}}}]
37+
; SM30: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFHANDLE]], {%r{{[0-9]+}}}]
5338
%val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %surfHandle, i32 %idx)
39+
; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
40+
; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
5441
%ret = sitofp i32 %val to float
42+
; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[REDF]]
43+
; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[REDF]]
5544
store float %ret, ptr %red
5645
ret void
5746
}

llvm/test/CodeGen/NVPTX/surf-tex.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# RUN: %python %s --target=cuda --tests=suld,sust,tex,tld4 --gen-list=%t.list > %t-cuda.ll
2-
# RUN: llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll
2+
# RUN: llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll --check-prefixes=CHECK,CHECK-CUDA
33
# RUN: %if ptxas %{ llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %}
44

55
# We only need to run this second time for texture tests, because
66
# there is a difference between unified and non-unified intrinsics.
77
#
88
# RUN: %python %s --target=nvcl --tests=suld,sust,tex,tld4 --gen-list-append --gen-list=%t.list > %t-nvcl.ll
9-
# RUN: llc %t-nvcl.ll -verify-machineinstrs -o - | FileCheck %t-nvcl.ll
9+
# RUN: llc %t-nvcl.ll -verify-machineinstrs -o - | FileCheck %t-nvcl.ll --check-prefixes=CHECK,CHECK-NVCL
1010
# RUN: %if ptxas %{ llc %t-nvcl.ll -verify-machineinstrs -o - | %ptxas-verify %}
1111

1212
# Verify that all instructions and intrinsics defined in TableGen
@@ -269,7 +269,9 @@ def gen_suld_tests(target, global_surf):
269269
ret void
270270
}
271271
; CHECK-LABEL: .entry ${test_name}_global
272-
; CHECK: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}]
272+
; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_surf}
273+
; CHECK-CUDA: ${instruction} ${reg_ret}, [[[REG${reg_id}]], ${reg_access}]
274+
; CHECK-NVCL: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}]
273275
define void @${test_name}_global(${retty}* %ret, ${access}) {
274276
%gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
275277
%val = tail call ${retty} @${intrinsic}(i64 %gs, ${access})
@@ -312,6 +314,7 @@ def gen_suld_tests(target, global_surf):
312314
"reg_ret": get_ptx_vec_reg(vec, dtype),
313315
"reg_surf": get_ptx_surface(target),
314316
"reg_access": get_ptx_surface_access(geom),
317+
"reg_id": get_table_gen_id(),
315318
}
316319
gen_test(template, params)
317320
generated_items.append((params["intrinsic"], params["instruction"]))
@@ -361,7 +364,9 @@ def gen_sust_tests(target, global_surf):
361364
ret void
362365
}
363366
; CHECK-LABEL: .entry ${test_name}_global
364-
; CHECK: ${instruction} [${global_surf}, ${reg_access}], ${reg_value}
367+
; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_surf}
368+
; CHECK-CUDA: ${instruction} [[[REG${reg_id}]], ${reg_access}], ${reg_value}
369+
; CHECK-NVCL: ${instruction} [${global_surf}, ${reg_access}], ${reg_value}
365370
define void @${test_name}_global(${value}, ${access}) {
366371
%gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
367372
tail call void @${intrinsic}(i64 %gs, ${access}, ${value})
@@ -415,6 +420,7 @@ def gen_sust_tests(target, global_surf):
415420
"reg_value": get_ptx_vec_reg(vec, ctype),
416421
"reg_surf": get_ptx_surface(target),
417422
"reg_access": get_ptx_surface_access(geom),
423+
"reg_id": get_table_gen_id(),
418424
}
419425
gen_test(template, params)
420426
generated_items.append((params["intrinsic"], params["instruction"]))
@@ -621,7 +627,9 @@ def gen_tex_tests(target, global_tex, global_sampler):
621627
ret void
622628
}
623629
; CHECK-LABEL: .entry ${test_name}_global
624-
; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
630+
; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_tex}
631+
; CHECK-CUDA: ${instruction} ${ptx_ret}, [[[REG${reg_id}]], ${ptx_global_sampler} ${ptx_access}]
632+
; CHECK-NVCL: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
625633
define void @${test_name}_global(${retty}* %ret, ${access}) {
626634
%gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
627635
${get_sampler_handle}
@@ -705,6 +713,7 @@ def gen_tex_tests(target, global_tex, global_sampler):
705713
"ptx_tex": get_ptx_texture(target),
706714
"ptx_access": get_ptx_texture_access(geom, ctype),
707715
"ptx_global_sampler": get_ptx_global_sampler(target, global_sampler),
716+
"reg_id": get_table_gen_id(),
708717
}
709718
gen_test(template, params)
710719
generated_items.append((params["intrinsic"], params["instruction"]))
@@ -805,7 +814,9 @@ def gen_tld4_tests(target, global_tex, global_sampler):
805814
ret void
806815
}
807816
; CHECK-LABEL: .entry ${test_name}_global
808-
; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
817+
; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_tex}
818+
; CHECK-CUDA: ${instruction} ${ptx_ret}, [[[REG${reg_id}]], ${ptx_global_sampler} ${ptx_access}]
819+
; CHECK-NVCL: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
809820
define void @${test_name}_global(${retty}* %ret, ${access}) {
810821
%gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
811822
${get_sampler_handle}
@@ -851,6 +862,7 @@ def gen_tld4_tests(target, global_tex, global_sampler):
851862
"ptx_tex": get_ptx_texture(target),
852863
"ptx_access": get_ptx_tld4_access(geom),
853864
"ptx_global_sampler": get_ptx_global_sampler(target, global_sampler),
865+
"reg_id": get_table_gen_id(),
854866
}
855867
gen_test(template, params)
856868
generated_items.append((params["intrinsic"], params["instruction"]))

0 commit comments

Comments
 (0)