Skip to content

Commit e13230f

Browse files
committed
Merge branch 'rightlib' into merge-libs-250222-0050
2 parents 1041280 + 1d9cc11 commit e13230f

File tree

413 files changed

+7555
-2902
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

413 files changed

+7555
-2902
lines changed

build/conf/opensource.conf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ when ($OPENSOURCE == "yes" && $EXPORT_GRADLE == "yes") {
4747
# Extra macros to control how cmake export works
4848

4949
when ($OPENSOURCE == "yes" && $EXPORT_CMAKE == "yes") {
50-
# Python version is not actually used in exported cmake's rigth now.
50+
# Python version is not actually used in exported cmake's right now.
5151
# The only reason to set it is to avoid any deps on contrib/python|contrib/libs/python when
5252
# exporting PY_*_MODULE and force dependency to build/platform/python there.
5353
USE_SYSTEM_PYTHON=3.10

build/plugins/_dart_fields.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -662,7 +662,7 @@ def cpp_configs(cls, unit, flat_args, spec_args):
662662
class LintExtraParams:
663663
KEY = 'LINT-EXTRA-PARAMS'
664664

665-
_CUSTOM_CLANG_FORMAT_BIN_ALLOWED_PATHS = ('ads', 'bigrt', 'grut')
665+
_CUSTOM_CLANG_FORMAT_BIN_ALLOWED_PATHS = consts.CUSTOM_CLANG_FORMAT_YT_ALLOWED_PATHS
666666

667667
@classmethod
668668
def from_macro_args(cls, unit, flat_args, spec_args):

build/plugins/lib/test_const/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,8 @@ class DefaultLinterConfig(Enum):
466466
'build/internal/conf/autoincludes.json',
467467
)
468468

469+
CUSTOM_CLANG_FORMAT_YT_ALLOWED_PATHS = ('ads', 'bigrt', 'grut')
470+
469471

470472
class Status(object):
471473
GOOD, XFAIL, FAIL, XPASS, MISSING, CRASHED, TIMEOUT = range(1, 8)

build/scripts/link_dyn_lib.py

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -129,26 +129,6 @@ def fix_windows_param(ex):
129129
return ['/DEF:{}'.format(def_file.name)]
130130

131131

132-
CUDA_LIBRARIES = {
133-
'-lcublas_static': '-lcublas',
134-
'-lcublasLt_static': '-lcublasLt',
135-
'-lcudart_static': '-lcudart',
136-
'-lcudnn_static': '-lcudnn',
137-
'-lcufft_static_nocallback': '-lcufft',
138-
'-lcurand_static': '-lcurand',
139-
'-lcusolver_static': '-lcusolver',
140-
'-lcusparse_static': '-lcusparse',
141-
'-lmyelin_compiler_static': '-lmyelin',
142-
'-lmyelin_executor_static': '-lnvcaffe_parser',
143-
'-lmyelin_pattern_library_static': '',
144-
'-lmyelin_pattern_runtime_static': '',
145-
'-lnvinfer_static': '-lnvinfer',
146-
'-lnvinfer_plugin_static': '-lnvinfer_plugin',
147-
'-lnvonnxparser_static': '-lnvonnxparser',
148-
'-lnvparsers_static': '-lnvparsers',
149-
}
150-
151-
152132
def fix_cmd(arch, c):
153133
if arch == 'WINDOWS':
154134
prefix = '/DEF:'
@@ -174,16 +154,6 @@ def do_fix(p):
174154
return sum((do_fix(x) for x in c), [])
175155

176156

177-
def fix_cmd_for_dynamic_cuda(cmd):
178-
flags = []
179-
for flag in cmd:
180-
if flag in CUDA_LIBRARIES:
181-
flags.append(CUDA_LIBRARIES[flag])
182-
else:
183-
flags.append(flag)
184-
return flags
185-
186-
187157
def parse_args(args):
188158
parser = optparse.OptionParser()
189159
parser.disable_interspersed_args()
@@ -231,13 +201,6 @@ def parse_args(args):
231201
cmd = args
232202
cmd = fix_cmd(opts.arch, cmd)
233203

234-
if opts.dynamic_cuda:
235-
cmd = fix_cmd_for_dynamic_cuda(cmd)
236-
else:
237-
cuda_manager = link_exe.CUDAManager(opts.cuda_architectures, opts.nvprune_exe)
238-
cmd = link_exe.process_cuda_libraries_by_nvprune(cmd, cuda_manager, opts.build_root)
239-
cmd = link_exe.process_cuda_libraries_by_objcopy(cmd, opts.build_root, opts.objcopy_exe)
240-
241204
cmd = ProcessWholeArchiveOption(opts.arch, opts.whole_archive_peers, opts.whole_archive_libs).construct_cmd(cmd)
242205
thinlto_cache.preprocess(opts, cmd)
243206

build/scripts/link_exe.py

Lines changed: 0 additions & 195 deletions
Original file line numberDiff line numberDiff line change
@@ -16,184 +16,6 @@
1616
from process_whole_archive_option import ProcessWholeArchiveOption
1717

1818

19-
CUDA_LIBRARIES = {
20-
'-lcublas_static': '-lcublas',
21-
'-lcublasLt_static': '-lcublasLt',
22-
'-lcudart_static': '-lcudart',
23-
'-lcudnn_static': '-lcudnn',
24-
'-lcudnn_adv_infer_static': '-lcudnn',
25-
'-lcudnn_adv_train_static': '-lcudnn',
26-
'-lcudnn_cnn_infer_static': '-lcudnn',
27-
'-lcudnn_cnn_train_static': '-lcudnn',
28-
'-lcudnn_ops_infer_static': '-lcudnn',
29-
'-lcudnn_ops_train_static': '-lcudnn',
30-
'-lcufft_static_nocallback': '-lcufft',
31-
'-lcupti_static': '-lcupti',
32-
'-lcurand_static': '-lcurand',
33-
'-lcusolver_static': '-lcusolver',
34-
'-lcusparse_static': '-lcusparse',
35-
'-lmyelin_compiler_static': '-lmyelin',
36-
'-lmyelin_executor_static': '-lnvcaffe_parser',
37-
'-lmyelin_pattern_library_static': '',
38-
'-lmyelin_pattern_runtime_static': '',
39-
'-lnvinfer_static': '-lnvinfer',
40-
'-lnvinfer_plugin_static': '-lnvinfer_plugin',
41-
'-lnvonnxparser_static': '-lnvonnxparser',
42-
'-lnvparsers_static': '-lnvparsers',
43-
'-lnvrtc_static': '-lnvrtc',
44-
'-lnvrtc-builtins_static': '-lnvrtc-builtins',
45-
'-lnvptxcompiler_static': '',
46-
'-lnppc_static': '-lnppc',
47-
'-lnppial_static': '-lnppial',
48-
'-lnppicc_static': '-lnppicc',
49-
'-lnppicom_static': '-lnppicom',
50-
'-lnppidei_static': '-lnppidei',
51-
'-lnppif_static': '-lnppif',
52-
'-lnppig_static': '-lnppig',
53-
'-lnppim_static': '-lnppim',
54-
'-lnppist_static': '-lnppist',
55-
'-lnppisu_static': '-lnppisu',
56-
'-lnppitc_static': '-lnppitc',
57-
'-lnpps_static': '-lnpps',
58-
}
59-
60-
61-
class CUDAManager:
62-
def __init__(self, known_arches, nvprune_exe):
63-
self.fatbin_libs = self._known_fatbin_libs(set(CUDA_LIBRARIES))
64-
65-
self.prune_args = []
66-
if known_arches:
67-
for arch in known_arches.split(':'):
68-
self.prune_args.append('-gencode')
69-
self.prune_args.append(self._arch_flag(arch))
70-
71-
self.nvprune_exe = nvprune_exe
72-
73-
def has_cuda_fatbins(self, cmd):
74-
return bool(set(cmd) & self.fatbin_libs)
75-
76-
@property
77-
def can_prune_libs(self):
78-
return self.prune_args and self.nvprune_exe
79-
80-
def _known_fatbin_libs(self, libs):
81-
libs_wo_device_code = {
82-
'-lcudart_static',
83-
'-lcupti_static',
84-
'-lnppc_static',
85-
}
86-
return set(libs) - libs_wo_device_code
87-
88-
def _arch_flag(self, arch):
89-
_, ver = arch.split('_', 1)
90-
return 'arch=compute_{},code={}'.format(ver, arch)
91-
92-
def prune_lib(self, inp_fname, out_fname):
93-
if self.prune_args:
94-
prune_command = [self.nvprune_exe] + self.prune_args + ['--output-file', out_fname, inp_fname]
95-
subprocess.check_call(prune_command)
96-
97-
def write_linker_script(self, f):
98-
# This script simply says:
99-
# * Place all `.nv_fatbin` input sections from all input files into one `.nv_fatbin` output section of output file
100-
# * Place it after `.bss` section
101-
#
102-
# Motivation can be found here: https://maskray.me/blog/2021-07-04-sections-and-overwrite-sections#insert-before-and-insert-after
103-
# TL;DR - we put section with a lot of GPU code directly after the last meaningful section in the binary
104-
# (which turns out to be .bss)
105-
# In that case, we decrease chances of relocation overflows from .text to .bss,
106-
# because now these sections are close to each other
107-
script = textwrap.dedent("""
108-
SECTIONS {
109-
.nv_fatbin : { *(.nv_fatbin) }
110-
} INSERT AFTER .bss
111-
""").strip()
112-
113-
f.write(script)
114-
115-
116-
def tmpdir_generator(base_path, prefix):
117-
for idx in itertools.count():
118-
path = os.path.abspath(os.path.join(base_path, prefix + '_' + str(idx)))
119-
os.makedirs(path)
120-
yield path
121-
122-
123-
def process_cuda_library_by_external_tool(cmd, build_root, tool_name, callable_tool_executor, allowed_cuda_libs):
124-
tmpdir_gen = tmpdir_generator(build_root, 'cuda_' + tool_name + '_libs')
125-
126-
new_flags = []
127-
cuda_deps = set()
128-
129-
# Because each directory flag only affects flags that follow it,
130-
# for correct pruning we need to process that in reversed order
131-
for flag in reversed(cmd):
132-
if flag in allowed_cuda_libs:
133-
cuda_deps.add('lib' + flag[2:] + '.a')
134-
flag += '_' + tool_name
135-
elif flag.startswith('-L') and os.path.exists(flag[2:]) and os.path.isdir(flag[2:]) and any(f in cuda_deps for f in os.listdir(flag[2:])):
136-
from_dirpath = flag[2:]
137-
from_deps = list(cuda_deps & set(os.listdir(from_dirpath)))
138-
139-
if from_deps:
140-
to_dirpath = next(tmpdir_gen)
141-
142-
for f in from_deps:
143-
from_path = os.path.join(from_dirpath, f)
144-
to_path = os.path.join(to_dirpath, f[:-2] + '_' + tool_name +'.a')
145-
callable_tool_executor(from_path, to_path)
146-
cuda_deps.remove(f)
147-
148-
# do not remove current directory
149-
# because it can contain other libraries we want link to
150-
# instead we just add new directory with processed by tool libs
151-
new_flags.append('-L' + to_dirpath)
152-
153-
new_flags.append(flag)
154-
155-
assert not cuda_deps, ('Unresolved CUDA deps: ' + ','.join(cuda_deps))
156-
return reversed(new_flags)
157-
158-
159-
def process_cuda_libraries_by_objcopy(cmd, build_root, objcopy_exe):
160-
if not objcopy_exe:
161-
return cmd
162-
163-
def run_objcopy(from_path, to_path):
164-
rename_section_command = [objcopy_exe, "--rename-section", ".ctors=.init_array", from_path, to_path]
165-
subprocess.check_call(rename_section_command)
166-
167-
possible_libraries = set(CUDA_LIBRARIES.keys())
168-
possible_libraries.update([
169-
'-lcudadevrt',
170-
'-lcufilt',
171-
'-lculibos',
172-
])
173-
possible_libraries.update([
174-
lib_name + "_pruner" for lib_name in possible_libraries
175-
])
176-
177-
return process_cuda_library_by_external_tool(list(cmd), build_root, 'objcopy', run_objcopy, possible_libraries)
178-
179-
180-
def process_cuda_libraries_by_nvprune(cmd, cuda_manager, build_root):
181-
if not cuda_manager.has_cuda_fatbins(cmd):
182-
return cmd
183-
184-
# add custom linker script
185-
to_dirpath = next(tmpdir_generator(build_root, 'cuda_linker_script'))
186-
script_path = os.path.join(to_dirpath, 'script')
187-
with open(script_path, 'w') as f:
188-
cuda_manager.write_linker_script(f)
189-
flags_with_linker = list(cmd) + ['-Wl,--script={}'.format(script_path)]
190-
191-
if not cuda_manager.can_prune_libs:
192-
return flags_with_linker
193-
194-
return process_cuda_library_by_external_tool(flags_with_linker, build_root, 'pruner', cuda_manager.prune_lib, cuda_manager.fatbin_libs)
195-
196-
19719
def remove_excessive_flags(cmd):
19820
flags = []
19921
for flag in cmd:
@@ -202,16 +24,6 @@ def remove_excessive_flags(cmd):
20224
return flags
20325

20426

205-
def fix_cmd_for_dynamic_cuda(cmd):
206-
flags = []
207-
for flag in cmd:
208-
if flag in CUDA_LIBRARIES:
209-
flags.append(CUDA_LIBRARIES[flag])
210-
else:
211-
flags.append(flag)
212-
return flags
213-
214-
21527
def remove_libs(cmd, libs):
21628
excluded_flags = ['-l{}'.format(lib) for lib in libs]
21729

@@ -270,13 +82,6 @@ def parse_args(args):
27082
cmd = args
27183
cmd = remove_excessive_flags(cmd)
27284

273-
if opts.dynamic_cuda:
274-
cmd = fix_cmd_for_dynamic_cuda(cmd)
275-
else:
276-
cuda_manager = CUDAManager(opts.cuda_architectures, opts.nvprune_exe)
277-
cmd = process_cuda_libraries_by_nvprune(cmd, cuda_manager, opts.build_root)
278-
cmd = process_cuda_libraries_by_objcopy(cmd, opts.build_root, opts.objcopy_exe)
279-
28085
if opts.exclude_libs:
28186
cmd = remove_libs(cmd, opts.exclude_libs)
28287

0 commit comments

Comments
 (0)