@@ -65,6 +65,17 @@ void checkOrCreateContext() {
65
65
}
66
66
67
67
namespace {
68
+ static void checkedSystemCall (
69
+ const std::string& cmd,
70
+ const std::vector<std::string>& args) {
71
+ std::stringstream command;
72
+ command << cmd << " " ;
73
+ for (const auto & s : args) {
74
+ command << s << " " ;
75
+ }
76
+ TC_CHECK_EQ (std::system (command.str ().c_str ()), 0 ) << command.str ();
77
+ }
78
+
68
79
static std::tuple<int , int , int > getCudaArchitecture () {
69
80
int device, major, minor;
70
81
CUdevice deviceHandle;
@@ -107,30 +118,48 @@ static std::string llvmCompile(
107
118
std::remove (outputPtxFile.c_str ());
108
119
});
109
120
110
- std::string cmdLlvmIr = std::string (TC_STRINGIFY (TC_LLVM_BIN_DIR)) +
111
- " /clang++ -x cuda " + inputFileName + " " + " --cuda-device-only " +
112
- " --cuda-gpu-arch=" + arch + " " +
113
- " --cuda-path=" + TC_STRINGIFY (TC_CUDA_TOOLKIT_ROOT_DIR) + " " + " -I" +
114
- TC_STRINGIFY (TC_CUDA_INCLUDE_DIR) + " " + " -I" +
115
- TC_STRINGIFY (TC_CUB_INCLUDE_DIR) + " " + tc::FLAGS_llvm_flags +
116
- " -DNVRTC_CUB=1 " + " -nocudalib -S -emit-llvm " + " -o " +
117
- outputClangFile;
118
- TC_CHECK_EQ (std::system (cmdLlvmIr.c_str ()), 0 ) << cmdLlvmIr;
119
-
120
- std::string cmdLlvmLink = std::string (TC_STRINGIFY (TC_LLVM_BIN_DIR)) +
121
- " /llvm-link " + outputClangFile + " " +
122
- TC_STRINGIFY (TC_CUDA_TOOLKIT_ROOT_DIR) +
123
- " /nvvm/libdevice/libdevice.*.bc " + " -S -o " + outputLinkFile;
124
- TC_CHECK_EQ (std::system (cmdLlvmLink.c_str ()), 0 ) << cmdLlvmLink;
125
-
126
- std::string cmdOpt = std::string (TC_STRINGIFY (TC_LLVM_BIN_DIR)) + " /opt " +
127
- " -internalize -internalize-public-api-list=" + name + " " +
128
- " -nvvm-reflect -O3 " + outputLinkFile + " -S -o " + outputOptFile;
129
- TC_CHECK_EQ (std::system (cmdOpt.c_str ()), 0 ) << cmdOpt;
130
-
131
- std::string cmdPtx = std::string (TC_STRINGIFY (TC_LLVM_BIN_DIR)) +
132
- " /llc -mcpu=" + arch + " " + outputOptFile + " -o " + outputPtxFile;
133
- TC_CHECK_EQ (std::system (cmdPtx.c_str ()), 0 ) << cmdPtx;
121
+ // Compile
122
+ checkedSystemCall (
123
+ std::string (TC_STRINGIFY (TC_LLVM_BIN_DIR)) + " /clang++" ,
124
+ {" -x cuda " + inputFileName,
125
+ " --cuda-device-only" ,
126
+ std::string (" --cuda-gpu-arch=" ) + arch,
127
+ std::string (" --cuda-path=" ) + TC_STRINGIFY (TC_CUDA_TOOLKIT_ROOT_DIR),
128
+ std::string (" -I" ) + TC_STRINGIFY (TC_CUDA_INCLUDE_DIR),
129
+ std::string (" -I" ) + TC_STRINGIFY (TC_CUB_INCLUDE_DIR),
130
+ tc::FLAGS_llvm_flags,
131
+ " -DNVRTC_CUB=1" ,
132
+ " -nocudalib" ,
133
+ " -S" ,
134
+ " -emit-llvm" ,
135
+ " -o " + outputClangFile});
136
+
137
+ // Link libdevice before opt
138
+ checkedSystemCall (
139
+ std::string (TC_STRINGIFY (TC_LLVM_BIN_DIR)) + " /llvm-link " ,
140
+ {outputClangFile,
141
+ std::string (TC_STRINGIFY (TC_CUDA_TOOLKIT_ROOT_DIR)) +
142
+ " /nvvm/libdevice/libdevice.*.bc" ,
143
+ " -S" ,
144
+ " -o " + outputLinkFile});
145
+
146
+ // Opt
147
+ checkedSystemCall (
148
+ std::string (TC_STRINGIFY (TC_LLVM_BIN_DIR)) + " /opt" ,
149
+ {" -internalize" ,
150
+ std::string (" -internalize-public-api-list=" ) + name,
151
+ " -nvvm-reflect" ,
152
+ " -O3" ,
153
+ outputLinkFile,
154
+ " -S" ,
155
+ std::string (" -o " ) + outputOptFile});
156
+
157
+ // Ptx
158
+ checkedSystemCall (
159
+ std::string (TC_STRINGIFY (TC_LLVM_BIN_DIR)) + " /llc" ,
160
+ {std::string (" -mcpu=" ) + arch,
161
+ outputOptFile,
162
+ std::string (" -o " ) + outputPtxFile});
134
163
135
164
std::ifstream stream (outputPtxFile);
136
165
return std::string (
@@ -160,12 +189,16 @@ static std::string nvccCompile(
160
189
// cstdio's std::remove to delete files
161
190
tc::ScopeGuard sgo ([&]() { std::remove (outputPtxFile.c_str ()); });
162
191
163
- std::string cmdPtx = std::string (TC_STRINGIFY (TC_CUDA_TOOLKIT_ROOT_DIR)) +
164
- " /bin/nvcc -x cu " + inputFileName + " --gpu-architecture=" + arch + " " +
165
- " --ptx " + " -I" + TC_STRINGIFY (TC_CUDA_INCLUDE_DIR) + " " + " -I" +
166
- TC_STRINGIFY (TC_CUB_INCLUDE_DIR) + " " + tc::FLAGS_nvcc_flags + " -o " +
167
- outputPtxFile;
168
- TC_CHECK_EQ (std::system (cmdPtx.c_str ()), 0 ) << cmdPtx;
192
+ checkedSystemCall (
193
+ std::string (TC_STRINGIFY (TC_CUDA_TOOLKIT_ROOT_DIR)) + " /bin/nvcc" ,
194
+ {" -x cu" ,
195
+ inputFileName,
196
+ std::string (" --gpu-architecture=" ) + arch,
197
+ " --ptx" ,
198
+ std::string (" -I" ) + TC_STRINGIFY (TC_CUDA_INCLUDE_DIR),
199
+ std::string (" -I" ) + TC_STRINGIFY (TC_CUB_INCLUDE_DIR),
200
+ tc::FLAGS_nvcc_flags,
201
+ std::string (" -o " ) + outputPtxFile});
169
202
170
203
std::ifstream stream (outputPtxFile);
171
204
return std::string (
0 commit comments