9
9
#include " indexer/Path.h"
10
10
11
11
#include " absl/algorithm/container.h"
12
+ #include " absl/container/flat_hash_map.h"
12
13
#include " absl/container/flat_hash_set.h"
13
14
#include " absl/strings/ascii.h"
14
15
#include " absl/strings/strip.h"
@@ -231,6 +232,127 @@ struct GccToolchainInfo : public ToolchainInfo {
231
232
}
232
233
};
233
234
235
+ enum class NvccOptionType {
236
+ NoArgument,
237
+ OneArgument,
238
+ };
239
+
240
+ // Based on nvcc --help from nvcc version V12.2.140
241
+ // Build cuda_12.2.r12.2/compiler.33191640_0
242
+
243
+ // clang-format off
244
+ constexpr const char * skipOptionsNoArgs[] = {
245
+ " --cuda" , " -cuda" ,
246
+ " --cubin" , " -cubin" ,
247
+ " --fatbin" , " -fatbin" ,
248
+ " --ptx" , " -ptx" ,
249
+ " --optix-ir" , " -optix-ir" ,
250
+ " --generate-dependencies" , // clang uses --dependencies,
251
+ " --compile" ,
252
+ " --device-c" , " -dc" ,
253
+ " --device-w" , " -dw" ,
254
+ " --device-link" , " -dlink" ,
255
+ " --link" , " -link" ,
256
+ " --lib" , " -lib" ,
257
+ " --run" , " -run" ,
258
+ " --output-file" , // clang uses --output
259
+ " --compiler-bindir" , " -ccbin" ,
260
+ " --allow-unsupported-compiler" ,
261
+ " --archiver-binary" , " -arbin" ,
262
+ " --use-local-env" , " -use-local-env" ,
263
+ " --profile" , " -pg" ,
264
+ " --debug" ,
265
+ " --device-debug" , " -G" ,
266
+ " --generate-line-info" ,
267
+ " --dlink-time-opt" , " -dlto" ,
268
+ " --gen-opt-lto" , " -gen-opt-lto" ,
269
+ " --no-host-device-initializer-list" , " -nohdinitlist" ,
270
+ " --no-host-device-move-forward" , " -nohdmvforward" ,
271
+ " --expt-relaxed-constexpr" , " -expt-relaxed-constexpr" ,
272
+ " --extended-lambda" , " -extended-lambda" ,
273
+ " --expt-extended-lambda" , " -expt-extended-lambda" ,
274
+ " --m64" , " -m64" ,
275
+ " --forward-unknown-to-host-compiler" , " -forward-unknown-to-host-compiler" ,
276
+ " --forward-unknown-opts" , " -forward-unknown-opts" ,
277
+ " --keep" , " -keep" ,
278
+ " --save-temps" , " -save-temps" ,
279
+ " --no-align-double" , " -no-align-double" ,
280
+ " --no-device-link" , " -nodlink" ,
281
+ " --extra-device-vectorization" , " -extra-device-vectorization" ,
282
+ " --disable-warnings" , " -w" ,
283
+ " --keep-device-functions" , " -keep-device-functions" ,
284
+ " --source-in-ptx" , " -src-in-ptx" ,
285
+ " --restrict" , " -restrict" ,
286
+ " --Wreorder" , " -Wreorder" ,
287
+ " --Wdefault-stream-launch" , " -Wdefault-stream-launch" ,
288
+ " --Wmissing-launch-bounds" , " -Wmissing-launch-bounds" ,
289
+ " --Wext-lambda-captures-this" , " -Wext-lambda-captures-this" ,
290
+ " --Wno-deprecated-declarations" , " -Wno-deprecated-declarations" ,
291
+ " --Wno-deprecated-gpu-targets" , " -Wno-deprecated-gpu-targets" ,
292
+ " --resource-usage" , " -res-usage" ,
293
+ " --extensible-whole-program" , " -ewp" ,
294
+ // --compress-all is undocumented, but assuming it is similar to
295
+ // --no-compress
296
+ " --compress-all" , " -compress-all" ,
297
+ " --no-compress" , " -no-compress" ,
298
+ " --qpp-config" , " -qpp-config" ,
299
+ " --compile-as-tools-patch" , " -astoolspatch" ,
300
+ " --display-error-number" , " -err-no" ,
301
+ " --no-display-error-number" , " -no-err-no" ,
302
+ " --augment-host-linker-script" , " -aug-hls" ,
303
+ " --host-relocatable-link" , " -r"
304
+ };
305
+
306
+ constexpr const char * skipOptionsWithArgs[] = {
307
+ " --cudart" , " -cudart" ,
308
+ " --cudadevrt" , " -cudadevrt" ,
309
+ " --libdevice-directory" , " -ldir" ,
310
+ " --target-directory" , " -target-dir" ,
311
+ " --optimization-info" ,
312
+ " --optimize" ,
313
+ " --dopt" , " -dopt" ,
314
+ " --machine" , " -m" ,
315
+ " --threads" , " -t" ,
316
+ " --split-compile" , " -split-compile" ,
317
+ " --keep-dir" , " -keep-dir" ,
318
+ // TODO: Strictly speaking, these could be inlined
319
+ // and/or recursively processed, but ignore them for now.
320
+ " --compiler-options" , " -Xcompiler" , " --options-file" ,
321
+ // --fatbin-options is undocumented but I'm assuming it
322
+ // behaves similar to the other *-options
323
+ " --fatbin-options" , " -Xfatbin" ,
324
+ " --linker-options" ,
325
+ " --archive-options" , " -Xarchive" ,
326
+ " --ptxas-options" , " -Xptxas" ,
327
+ " --nvlink-options" , " -Xnvlink" ,
328
+ " --time" , " -time" ,
329
+ " --run-args" , " -run-args" ,
330
+ " --input-drive-prefix" , " -idp" ,
331
+ " --dependency-drive-prefix" , " -ddp" ,
332
+ " --drive-prefix" , " -dp" ,
333
+ " -dependency-target-name" ,
334
+ " --gpu-architecture" ,
335
+ " --gpu-code" , " -code" ,
336
+ " --generate-code" , " -gencode" ,
337
+ " --relocatable-device-code" , " -rdc" ,
338
+ " --entries" , " -e" ,
339
+ " --maxrregcount" , " -maxrregcount" ,
340
+ " --use_fast_math" , " -use_fast_math" ,
341
+ " --ftz" , " -ftz" ,
342
+ " --prec-div" , " -prec-div" ,
343
+ " --prec-sqrt" , " -prec-sqrt" ,
344
+ " --fmad" , " -fmad" ,
345
+ " --default-stream" , " -default-stream" ,
346
+ " --Werror" , " -Werror" ,
347
+ " --diag-error" , " -diag-error" ,
348
+ " --diag-suppress" , " -diag-suppress" ,
349
+ " --diag-warn" , " -diag-warn" ,
350
+ " --host-linker-script" , " -hls" ,
351
+ " --brief-diagnostics" , " -brief-diag"
352
+ };
353
+
354
+ // clang-format on
355
+
234
356
struct NvccToolchainInfo : public ToolchainInfo {
235
357
AbsolutePath cudaDir;
236
358
@@ -239,8 +361,19 @@ struct NvccToolchainInfo : public ToolchainInfo {
239
361
// / doesn't even construct the appropriate CUDAKernelCallExpr values.
240
362
std::unique_ptr<ClangToolchainInfo> clangInfo;
241
363
364
+ absl::flat_hash_map<std::string_view, NvccOptionType> toBeSkipped;
365
+
242
366
NvccToolchainInfo (AbsolutePath cudaDir)
243
367
: ToolchainInfo(), cudaDir(cudaDir), clangInfo(nullptr ) {
368
+ for (auto s : skipOptionsNoArgs) {
369
+ this ->toBeSkipped .emplace (std::string_view (s),
370
+ NvccOptionType::NoArgument);
371
+ }
372
+ for (auto s : skipOptionsWithArgs) {
373
+ this ->toBeSkipped .emplace (std::string_view (s),
374
+ NvccOptionType::OneArgument);
375
+ }
376
+
244
377
// TODO: In principle, we could pick up Clang from -ccbin but that
245
378
// requires more plumbing; it would require using the -ccbin arg
246
379
// as part of the hash map key for toolchainInfoMap. So instead,
@@ -279,8 +412,66 @@ struct NvccToolchainInfo : public ToolchainInfo {
279
412
return true ;
280
413
}
281
414
415
+ enum class ArgumentProcessing {
416
+ Keep,
417
+ DropCurrent,
418
+ DropCurrentAndNextIffBothPresent,
419
+ };
420
+
421
+ ArgumentProcessing handleArgument (const std::string &arg) const {
422
+ if (!arg.starts_with (' -' )) {
423
+ return ArgumentProcessing::Keep;
424
+ }
425
+ std::string_view substr = arg;
426
+ auto eqIndex = arg.find (' =' );
427
+ if (eqIndex != std::string::npos) {
428
+ substr = std::string_view (arg.data (), eqIndex);
429
+ }
430
+ auto it = this ->toBeSkipped .find (substr);
431
+ if (it == this ->toBeSkipped .end ()) {
432
+ return ArgumentProcessing::Keep;
433
+ }
434
+ switch (it->second ) {
435
+ case NvccOptionType::NoArgument:
436
+ return ArgumentProcessing::DropCurrent;
437
+ case NvccOptionType::OneArgument:
438
+ if (substr.size () == arg.size ()) {
439
+ return ArgumentProcessing::DropCurrentAndNextIffBothPresent;
440
+ }
441
+ return ArgumentProcessing::DropCurrent;
442
+ }
443
+ ENFORCE (false , " should've exited earlier" );
444
+ }
445
+
446
+ void removeUnknownArguments (std::vector<std::string> &commandLine) const {
447
+ absl::flat_hash_set<size_t > drop{};
448
+ for (size_t i = 0 ; i < commandLine.size (); ++i) {
449
+ switch (this ->handleArgument (commandLine[i])) {
450
+ case ArgumentProcessing::Keep:
451
+ continue ;
452
+ case ArgumentProcessing::DropCurrent:
453
+ drop.insert (i);
454
+ continue ;
455
+ case ArgumentProcessing::DropCurrentAndNextIffBothPresent:
456
+ if (i + 1 < commandLine.size ()) {
457
+ drop.insert (i);
458
+ drop.insert (i + 1 );
459
+ }
460
+ }
461
+ }
462
+ std::vector<std::string> tmp;
463
+ tmp.reserve (commandLine.size () - drop.size ());
464
+ for (size_t i = 0 ; i < commandLine.size (); ++i) {
465
+ if (!drop.contains (i)) {
466
+ tmp.push_back (std::move (commandLine[i]));
467
+ }
468
+ }
469
+ std::swap (tmp, commandLine);
470
+ }
471
+
282
472
virtual void
283
473
adjustCommandLine (std::vector<std::string> &commandLine) const override {
474
+ this ->removeUnknownArguments (commandLine);
284
475
commandLine.push_back (
285
476
fmt::format (" -isystem{}{}include" , this ->cudaDir .asStringRef (),
286
477
std::filesystem::path::preferred_separator));
0 commit comments