Skip to content

Commit c50d39f

Browse files
[PGO][Offload] Allow PGO flags to be used on GPU targets (#94268)
This pull request is the third part of an ongoing effort to extends PGO instrumentation to GPU device code and depends on #93365. This PR makes the following changes: - Allows PGO flags to be supplied to GPU targets - Pulls version global from device - Modifies `__llvm_write_custom_profile` and `lprofWriteDataImpl` to allow the PGO version to be overridden
1 parent de2a451 commit c50d39f

File tree

15 files changed

+241
-130
lines changed

15 files changed

+241
-130
lines changed

clang/lib/CodeGen/CodeGenPGO.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1357,6 +1357,9 @@ void CodeGenPGO::setProfileVersion(llvm::Module &M) {
13571357

13581358
IRLevelVersionVariable->setVisibility(llvm::GlobalValue::HiddenVisibility);
13591359
llvm::Triple TT(M.getTargetTriple());
1360+
if (TT.isAMDGPU() || TT.isNVPTX())
1361+
IRLevelVersionVariable->setVisibility(
1362+
llvm::GlobalValue::ProtectedVisibility);
13601363
if (TT.supportsCOMDAT()) {
13611364
IRLevelVersionVariable->setLinkage(llvm::GlobalValue::ExternalLinkage);
13621365
IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6388,11 +6388,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
63886388
Args.AddLastArg(CmdArgs, options::OPT_fconvergent_functions,
63896389
options::OPT_fno_convergent_functions);
63906390

6391-
// NVPTX/AMDGCN doesn't support PGO or coverage. There's no runtime support
6392-
// for sampling, overhead of call arc collection is way too high and there's
6393-
// no way to collect the output.
6394-
if (!Triple.isNVPTX() && !Triple.isAMDGCN())
6395-
addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs);
6391+
addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs);
63966392

63976393
Args.AddLastArg(CmdArgs, options::OPT_fclang_abi_compat_EQ);
63986394

clang/test/Driver/cuda-no-pgo-or-coverage.cu

Lines changed: 0 additions & 33 deletions
This file was deleted.

compiler-rt/lib/profile/InstrProfiling.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,8 @@ int __llvm_write_custom_profile(const char *Target,
310310
const __llvm_profile_data *DataEnd,
311311
const char *CountersBegin,
312312
const char *CountersEnd, const char *NamesBegin,
313-
const char *NamesEnd);
313+
const char *NamesEnd,
314+
const uint64_t *VersionOverride);
314315

315316
/*!
316317
* This variable is defined in InstrProfilingRuntime.cpp as a hidden

compiler-rt/lib/profile/InstrProfilingBuffer.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,5 +252,6 @@ COMPILER_RT_VISIBILITY int __llvm_profile_write_buffer_internal(
252252
&BufferWriter, DataBegin, DataEnd, CountersBegin, CountersEnd,
253253
BitmapBegin, BitmapEnd, /*VPDataReader=*/0, NamesBegin, NamesEnd,
254254
/*VTableBegin=*/NULL, /*VTableEnd=*/NULL, /*VNamesBegin=*/NULL,
255-
/*VNamesEnd=*/NULL, /*SkipNameDataWrite=*/0);
255+
/*VNamesEnd=*/NULL, /*SkipNameDataWrite=*/0,
256+
__llvm_profile_get_version());
256257
}

compiler-rt/lib/profile/InstrProfilingFile.c

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1273,10 +1273,13 @@ COMPILER_RT_VISIBILITY int __llvm_profile_set_file_object(FILE *File,
12731273
return 0;
12741274
}
12751275

1276-
COMPILER_RT_USED int __llvm_write_custom_profile(
1277-
const char *Target, const __llvm_profile_data *DataBegin,
1278-
const __llvm_profile_data *DataEnd, const char *CountersBegin,
1279-
const char *CountersEnd, const char *NamesBegin, const char *NamesEnd) {
1276+
int __llvm_write_custom_profile(const char *Target,
1277+
const __llvm_profile_data *DataBegin,
1278+
const __llvm_profile_data *DataEnd,
1279+
const char *CountersBegin,
1280+
const char *CountersEnd, const char *NamesBegin,
1281+
const char *NamesEnd,
1282+
const uint64_t *VersionOverride) {
12801283
int ReturnValue = 0, FilenameLength, TargetLength;
12811284
char *FilenameBuf, *TargetFilename;
12821285
const char *Filename;
@@ -1358,10 +1361,15 @@ COMPILER_RT_USED int __llvm_write_custom_profile(
13581361
ProfDataWriter fileWriter;
13591362
initFileWriter(&fileWriter, OutputFile);
13601363

1364+
uint64_t Version = __llvm_profile_get_version();
1365+
if (VersionOverride)
1366+
Version = *VersionOverride;
1367+
13611368
/* Write custom data to the file */
1362-
ReturnValue = lprofWriteDataImpl(
1363-
&fileWriter, DataBegin, DataEnd, CountersBegin, CountersEnd, NULL, NULL,
1364-
lprofGetVPDataReader(), NULL, NULL, NULL, NULL, NamesBegin, NamesEnd, 0);
1369+
ReturnValue =
1370+
lprofWriteDataImpl(&fileWriter, DataBegin, DataEnd, CountersBegin,
1371+
CountersEnd, NULL, NULL, lprofGetVPDataReader(), NULL,
1372+
NULL, NULL, NULL, NamesBegin, NamesEnd, 0, Version);
13651373
closeFileObject(OutputFile);
13661374

13671375
// Restore SIGKILL.

compiler-rt/lib/profile/InstrProfilingInternal.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,8 @@ int lprofWriteDataImpl(ProfDataWriter *Writer,
160160
VPDataReaderType *VPDataReader, const char *NamesBegin,
161161
const char *NamesEnd, const VTableProfData *VTableBegin,
162162
const VTableProfData *VTableEnd, const char *VNamesBegin,
163-
const char *VNamesEnd, int SkipNameDataWrite);
163+
const char *VNamesEnd, int SkipNameDataWrite,
164+
uint64_t Version);
164165

165166
/* Merge value profile data pointed to by SrcValueProfData into
166167
* in-memory profile counters pointed by to DstData. */

compiler-rt/lib/profile/InstrProfilingWriter.c

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -254,21 +254,21 @@ COMPILER_RT_VISIBILITY int lprofWriteData(ProfDataWriter *Writer,
254254
const VTableProfData *VTableEnd = __llvm_profile_end_vtables();
255255
const char *VNamesBegin = __llvm_profile_begin_vtabnames();
256256
const char *VNamesEnd = __llvm_profile_end_vtabnames();
257+
uint64_t Version = __llvm_profile_get_version();
257258
return lprofWriteDataImpl(Writer, DataBegin, DataEnd, CountersBegin,
258259
CountersEnd, BitmapBegin, BitmapEnd, VPDataReader,
259260
NamesBegin, NamesEnd, VTableBegin, VTableEnd,
260-
VNamesBegin, VNamesEnd, SkipNameDataWrite);
261+
VNamesBegin, VNamesEnd, SkipNameDataWrite, Version);
261262
}
262263

263-
COMPILER_RT_VISIBILITY int
264-
lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
265-
const __llvm_profile_data *DataEnd,
266-
const char *CountersBegin, const char *CountersEnd,
267-
const char *BitmapBegin, const char *BitmapEnd,
268-
VPDataReaderType *VPDataReader, const char *NamesBegin,
269-
const char *NamesEnd, const VTableProfData *VTableBegin,
270-
const VTableProfData *VTableEnd, const char *VNamesBegin,
271-
const char *VNamesEnd, int SkipNameDataWrite) {
264+
COMPILER_RT_VISIBILITY int lprofWriteDataImpl(
265+
ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
266+
const __llvm_profile_data *DataEnd, const char *CountersBegin,
267+
const char *CountersEnd, const char *BitmapBegin, const char *BitmapEnd,
268+
VPDataReaderType *VPDataReader, const char *NamesBegin,
269+
const char *NamesEnd, const VTableProfData *VTableBegin,
270+
const VTableProfData *VTableEnd, const char *VNamesBegin,
271+
const char *VNamesEnd, int SkipNameDataWrite, uint64_t Version) {
272272
/* Calculate size of sections. */
273273
const uint64_t DataSectionSize =
274274
__llvm_profile_get_data_size(DataBegin, DataEnd);
@@ -308,6 +308,7 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
308308
#define INSTR_PROF_RAW_HEADER(Type, Name, Init) Header.Name = Init;
309309
#include "profile/InstrProfData.inc"
310310
}
311+
Header.Version = Version;
311312

312313
/* On WIN64, label differences are truncated 32-bit values. Truncate
313314
* CountersDelta to match. */

llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,10 @@ createIRLevelProfileFlagVar(Module &M,
469469
M, IntTy64, true, GlobalValue::WeakAnyLinkage,
470470
Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
471471
IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
472+
if (isGPUProfTarget(M))
473+
IRLevelVersionVariable->setVisibility(
474+
llvm::GlobalValue::ProtectedVisibility);
475+
472476
Triple TT(M.getTargetTriple());
473477
if (TT.supportsCOMDAT()) {
474478
IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);

llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Header
22
//
33
// INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
4-
// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
4+
// INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
55
// INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
66
// INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
77
// INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)

0 commit comments

Comments
 (0)