Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit 79b4be7

Browse files
author
Sven Verdoolaege
committed
only introduce __ldg() calls on accessed to global memory
__ldg() only makes sense on accesses to global memory. It is not immediately obvious whether this would ever be introduced on accesses to promoted tensor, i.e., whether accesses to read-only tensors would ever get promoted, but it is better to be safe than sorry and therefore to make sure __ldg() calls are never introduced on accesses to promoted tensors.
1 parent fed4151 commit 79b4be7

File tree

1 file changed

+10
-3
lines changed

1 file changed

+10
-3
lines changed

tc/core/polyhedral/cuda/codegen.cc

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -398,9 +398,16 @@ struct LdgWrapper {
398398

399399
template <typename AFF>
400400
void emitAccess(AFF access, const CodegenStatementContext& context) {
401-
LdgWrapper ldgWrapper(context, access.get_tuple_id(isl::dim_type::out));
402401
context.ss << context.build().access_from(access).to_C_str();
403402
}
403+
404+
// Print an access to global memory, wrapping the access in an "__ldg()"
405+
// call if the accessed tensor is known to be read-only.
406+
template <typename AFF>
407+
void emitGlobalAccess(AFF access, const CodegenStatementContext& context) {
408+
LdgWrapper ldgWrapper(context, access.get_tuple_id(isl::dim_type::out));
409+
emitAccess(access, context);
410+
}
404411
} // namespace
405412

406413
void emitCopyStmt(const CodegenStatementContext& context) {
@@ -414,9 +421,9 @@ void emitCopyStmt(const CodegenStatementContext& context) {
414421
if (isRead) {
415422
emitAccess(isl::multi_pw_aff(promoted), context);
416423
context.ss << " = ";
417-
emitAccess(isl::multi_pw_aff(original), context);
424+
emitGlobalAccess(isl::multi_pw_aff(original), context);
418425
} else {
419-
emitAccess(isl::multi_pw_aff(original), context);
426+
emitGlobalAccess(isl::multi_pw_aff(original), context);
420427
context.ss << " = ";
421428
emitAccess(isl::multi_pw_aff(promoted), context);
422429
}

0 commit comments

Comments
 (0)