Skip to content

Commit 6032c95

Browse files
lestevethomasjpfanogrisel
authored
BLD Add Meson OpenMP checks (scikit-learn#29762)
Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com> Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
1 parent 35f8159 commit 6032c95

File tree

5 files changed

+190
-13
lines changed

5 files changed

+190
-13
lines changed

azure-pipelines.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ jobs:
4040
- bash: |
4141
./build_tools/linting.sh
4242
displayName: Run linters
43+
- bash: |
44+
pip install ninja meson scipy
45+
python build_tools/check-meson-openmp-dependencies.py
46+
displayName: Run Meson OpenMP checks
47+
4348
4449
- template: build_tools/azure/posix.yml
4550
parameters:
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
"""
2+
Check that OpenMP dependencies are correctly defined in meson.build files.
3+
4+
This is based on trying to make sure the the following two things match:
5+
- the Cython files using OpenMP (based on a git grep regex)
6+
- the Cython extension modules that are built with OpenMP compiler flags (based
7+
on meson introspect json output)
8+
"""
9+
10+
import json
11+
import re
12+
import subprocess
13+
from pathlib import Path
14+
15+
16+
def has_source_openmp_flags(target_source):
17+
return any("openmp" in arg for arg in target_source["parameters"])
18+
19+
20+
def has_openmp_flags(target):
21+
"""Return whether target sources use OpenMP flags.
22+
23+
Make sure that both compiler and linker source use OpenMP.
24+
Look at `get_meson_info` docstring to see what `target` looks like.
25+
"""
26+
target_sources = target["target_sources"]
27+
28+
target_use_openmp_flags = any(
29+
has_source_openmp_flags(target_source) for target_source in target_sources
30+
)
31+
32+
if not target_use_openmp_flags:
33+
return False
34+
35+
# When the target use OpenMP we expect a compiler + linker source and we
36+
# want to make sure that both the compiler and the linker use OpenMP
37+
assert len(target_sources) == 2
38+
compiler_source, linker_source = target_sources
39+
assert "compiler" in compiler_source
40+
assert "linker" in linker_source
41+
42+
compiler_use_openmp_flags = any(
43+
"openmp" in arg for arg in compiler_source["parameters"]
44+
)
45+
linker_use_openmp_flags = any(
46+
"openmp" in arg for arg in linker_source["parameters"]
47+
)
48+
49+
assert compiler_use_openmp_flags == linker_use_openmp_flags
50+
return compiler_use_openmp_flags
51+
52+
53+
def get_canonical_name_meson(target, build_path):
54+
"""Return a name based on generated shared library.
55+
56+
The goal is to return a name that can be easily matched with the output
57+
from `git_grep_info`.
58+
59+
Look at `get_meson_info` docstring to see what `target` looks like.
60+
"""
61+
# Expect a list with one element with the name of the shared library
62+
assert len(target["filename"]) == 1
63+
shared_library_path = Path(target["filename"][0])
64+
shared_library_relative_path = shared_library_path.relative_to(
65+
build_path.absolute()
66+
)
67+
# Needed on Windows to match git grep output
68+
rel_path = shared_library_relative_path.as_posix()
69+
# OS-specific naming of the shared library .cpython- on POSIX and
70+
# something like .cp312- on Windows
71+
pattern = r"\.(cpython|cp\d+)-.+"
72+
return re.sub(pattern, "", str(rel_path))
73+
74+
75+
def get_canonical_name_git_grep(filename):
76+
"""Return name based on filename.
77+
78+
The goal is to return a name that can easily be matched with the output
79+
from `get_meson_info`.
80+
"""
81+
return re.sub(r"\.pyx(\.tp)?", "", filename)
82+
83+
84+
def get_meson_info():
85+
"""Return names of extension that use OpenMP based on meson introspect output.
86+
87+
The meson introspect json info is a list of targets where a target is a dict
88+
that looks like this (parts not used in this script are not shown for simplicity):
89+
{
90+
'name': '_k_means_elkan.cpython-312-x86_64-linux-gnu',
91+
'filename': [
92+
'<meson_build_dir>/sklearn/cluster/_k_means_elkan.cpython-312-x86_64-linux-gnu.so'
93+
],
94+
'target_sources': [
95+
{
96+
'compiler': ['ccache', 'cc'],
97+
'parameters': [
98+
'-Wall',
99+
'-std=c11',
100+
'-fopenmp',
101+
...
102+
],
103+
...
104+
},
105+
{
106+
'linker': ['cc'],
107+
'parameters': [
108+
'-shared',
109+
'-fPIC',
110+
'-fopenmp',
111+
...
112+
]
113+
}
114+
]
115+
}
116+
"""
117+
build_path = Path("build/introspect")
118+
subprocess.check_call(["meson", "setup", build_path, "--reconfigure"])
119+
120+
json_out = subprocess.check_output(
121+
["meson", "introspect", build_path, "--targets"], text=True
122+
)
123+
target_list = json.loads(json_out)
124+
meson_targets = [target for target in target_list if has_openmp_flags(target)]
125+
126+
return [get_canonical_name_meson(each, build_path) for each in meson_targets]
127+
128+
129+
def get_git_grep_info():
130+
"""Return names of extensions that use OpenMP based on git grep regex."""
131+
git_grep_filenames = subprocess.check_output(
132+
["git", "grep", "-lP", "cython.*parallel|_openmp_helpers"], text=True
133+
).splitlines()
134+
git_grep_filenames = [f for f in git_grep_filenames if ".pyx" in f]
135+
136+
return [get_canonical_name_git_grep(each) for each in git_grep_filenames]
137+
138+
139+
def main():
140+
from_meson = set(get_meson_info())
141+
from_git_grep = set(get_git_grep_info())
142+
143+
only_in_git_grep = from_git_grep - from_meson
144+
only_in_meson = from_meson - from_git_grep
145+
146+
msg = ""
147+
if only_in_git_grep:
148+
only_in_git_grep_msg = "\n".join(
149+
[f" {each}" for each in sorted(only_in_git_grep)]
150+
)
151+
msg += (
152+
"Some Cython files use OpenMP,"
153+
" but their meson.build is missing the openmp_dep dependency:\n"
154+
f"{only_in_git_grep_msg}\n\n"
155+
)
156+
157+
if only_in_meson:
158+
only_in_meson_msg = "\n".join([f" {each}" for each in sorted(only_in_meson)])
159+
msg += (
160+
"Some Cython files do not use OpenMP,"
161+
" you should remove openmp_dep from their meson.build:\n"
162+
f"{only_in_meson_msg}\n\n"
163+
)
164+
165+
if from_meson != from_git_grep:
166+
raise ValueError(
167+
f"Some issues have been found in Meson OpenMP dependencies:\n\n{msg}"
168+
)
169+
170+
171+
if __name__ == "__main__":
172+
main()

sklearn/cluster/meson.build

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,20 @@ cluster_extension_metadata = {
55
{'sources': ['_hierarchical_fast.pyx', metrics_cython_tree],
66
'override_options': ['cython_language=cpp']},
77
'_k_means_common':
8-
{'sources': ['_k_means_common.pyx']},
8+
{'sources': ['_k_means_common.pyx'], 'dependencies': [openmp_dep]},
99
'_k_means_lloyd':
10-
{'sources': ['_k_means_lloyd.pyx']},
10+
{'sources': ['_k_means_lloyd.pyx'], 'dependencies': [openmp_dep]},
1111
'_k_means_elkan':
12-
{'sources': ['_k_means_elkan.pyx']},
12+
{'sources': ['_k_means_elkan.pyx'], 'dependencies': [openmp_dep]},
1313
'_k_means_minibatch':
14-
{'sources': ['_k_means_minibatch.pyx']},
14+
{'sources': ['_k_means_minibatch.pyx'], 'dependencies': [openmp_dep]},
1515
}
1616

1717
foreach ext_name, ext_dict : cluster_extension_metadata
1818
py.extension_module(
1919
ext_name,
2020
[ext_dict.get('sources'), utils_cython_tree],
21-
dependencies: [np_dep, openmp_dep],
21+
dependencies: [np_dep] + ext_dict.get('dependencies', []),
2222
override_options : ext_dict.get('override_options', []),
2323
cython_args: cython_args,
2424
subdir: 'sklearn/cluster',

sklearn/ensemble/_hist_gradient_boosting/meson.build

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
hist_gradient_boosting_extension_metadata = {
2-
'_gradient_boosting': {'sources': ['_gradient_boosting.pyx']},
3-
'histogram': {'sources': ['histogram.pyx']},
4-
'splitting': {'sources': ['splitting.pyx']},
5-
'_binning': {'sources': ['_binning.pyx']},
6-
'_predictor': {'sources': ['_predictor.pyx']},
2+
'_gradient_boosting': {'sources': ['_gradient_boosting.pyx'], 'dependencies': [openmp_dep]},
3+
'histogram': {'sources': ['histogram.pyx'], 'dependencies': [openmp_dep]},
4+
'splitting': {'sources': ['splitting.pyx'], 'dependencies': [openmp_dep]},
5+
'_binning': {'sources': ['_binning.pyx'], 'dependencies': [openmp_dep]},
6+
'_predictor': {'sources': ['_predictor.pyx'], 'dependencies': [openmp_dep]},
77
'_bitset': {'sources': ['_bitset.pyx']},
88
'common': {'sources': ['common.pyx']},
99
}
@@ -12,7 +12,7 @@ foreach ext_name, ext_dict : hist_gradient_boosting_extension_metadata
1212
py.extension_module(
1313
ext_name,
1414
ext_dict.get('sources'),
15-
dependencies: [openmp_dep],
15+
dependencies: ext_dict.get('dependencies', []),
1616
cython_args: cython_args,
1717
subdir: 'sklearn/ensemble/_hist_gradient_boosting',
1818
install: true

sklearn/metrics/_pairwise_distances_reduction/meson.build

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ _datasets_pair_pyx = custom_target(
3939
_datasets_pair = py.extension_module(
4040
'_datasets_pair',
4141
_datasets_pair_pyx,
42-
dependencies: [np_dep, openmp_dep],
42+
dependencies: [np_dep],
4343
override_options: ['cython_language=cpp'],
4444
cython_args: cython_args,
4545
subdir: 'sklearn/metrics/_pairwise_distances_reduction',
@@ -94,7 +94,7 @@ _middle_term_computer_pyx = custom_target(
9494
_middle_term_computer = py.extension_module(
9595
'_middle_term_computer',
9696
_middle_term_computer_pyx,
97-
dependencies: [np_dep, openmp_dep],
97+
dependencies: [np_dep],
9898
override_options: ['cython_language=cpp'],
9999
cython_args: cython_args,
100100
subdir: 'sklearn/metrics/_pairwise_distances_reduction',

0 commit comments

Comments
 (0)