Skip to content

Commit 42d0aff

Browse files
committed
apply_fixes can parse report files from a DWYU execution log
For large workspaces discovering the DWYU report files by crawling the bazel-out directory can be quite slow due to an enormous amount of files and directories being present. To work around this, we enable the apply_fixes script to parse a log file containing the command line output of executing the DWYU aspect. This execution log is then parsed and the DWYU report paths deduced.
1 parent c8be082 commit 42d0aff

File tree

17 files changed

+280
-67
lines changed

17 files changed

+280
-67
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,10 @@ You can see the full command line interface and more information about the scrip
248248
If the `apply_fixes` tool is not able to discover the report files, this can be caused by the `bazel-bin` convenience symlink at the workspace root not existing or not pointing to the output directory which was used by to generate the report files.
249249
The tool offers options to control how the output directory is discovered.
250250

251+
Discovering the DWYU report files automatically can take a large amount of time if the `bazel-bin` directory is too large.
252+
In such cases you can pipe the command line output of executing the DWYU aspect into a file and forward this file to the apply_fixes script via the `--dwyu-log-file` option.
253+
The apply_fixes script will then deduce the DWYU report file locations without crawling though thw whole `bazel-bin` directory.
254+
251255
Unfortunately, the tool cannot promise perfect results due to various constraints:
252256

253257
- If alias targets are involved, this cannot be processed properly.

src/analyze_includes/main.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ def main(args: Namespace) -> int:
9999
system_under_inspection=system_under_inspection,
100100
ensure_private_deps=args.implementation_deps_available,
101101
)
102+
result.report = args.report
102103

103104
args.report.parent.mkdir(parents=True, exist_ok=True)
104105
with args.report.open(mode="w", encoding="utf-8") as report:

src/analyze_includes/result.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,22 @@
66
from typing import TYPE_CHECKING
77

88
if TYPE_CHECKING:
9+
from pathlib import Path
10+
911
from src.analyze_includes.parse_source import Include
1012

1113

1214
@dataclass
1315
class Result:
1416
target: str
17+
report: Path | None = None
18+
use_impl_deps: bool = False
19+
1520
public_includes_without_dep: list[Include] = field(default_factory=list)
1621
private_includes_without_dep: list[Include] = field(default_factory=list)
1722
unused_deps: list[str] = field(default_factory=list)
1823
unused_impl_deps: list[str] = field(default_factory=list)
1924
deps_which_should_be_private: list[str] = field(default_factory=list)
20-
use_impl_deps: bool = False
2125

2226
def is_ok(self) -> bool:
2327
return (
@@ -47,6 +51,9 @@ def to_str(self) -> str:
4751
if self.deps_which_should_be_private:
4852
msg += "\nPublic dependencies which are used only in private code:\n"
4953
msg += "\n".join(f" Dependency='{dep}'" for dep in self.deps_which_should_be_private)
54+
55+
msg += f"\n\nDWYU Report: {self.report}"
56+
5057
return self._framed_msg(msg)
5158

5259
def to_json(self) -> str:

src/analyze_includes/test/result_test.py

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from __future__ import annotations
2+
13
import unittest
24
from pathlib import Path
35

@@ -7,14 +9,16 @@
79

810
class TestResult(unittest.TestCase):
911
@staticmethod
10-
def _expected_msg(target: str, errors: str = "") -> str:
12+
def _expected_msg(target: str, errors: str = "", report: str | None = None) -> str:
1113
border = 80 * "="
1214
msg = f"DWYU analyzing: '{target}'\n\n"
1315
if errors:
1416
msg += "Result: FAILURE\n\n"
17+
report = f"\n\nDWYU Report: {report}\n"
1518
else:
1619
msg += "Result: SUCCESS"
17-
return border + "\n" + msg + errors + "\n" + border
20+
report = "\n"
21+
return border + "\n" + msg + errors + report + border
1822

1923
def test_is_ok(self) -> None:
2024
unit = Result("//foo:bar")
@@ -33,6 +37,43 @@ def test_is_ok(self) -> None:
3337
"deps_which_should_be_private": [],
3438
"use_implementation_deps": false
3539
}
40+
""".lstrip(),
41+
)
42+
43+
def test_is_ok_fails_and_prints_report(self) -> None:
44+
unit = Result(
45+
target="//foo:bar",
46+
private_includes_without_dep=[Include(file=Path("foo"), include="missing")],
47+
)
48+
unit.report = Path("some/report.json")
49+
50+
self.assertFalse(unit.is_ok())
51+
self.assertEqual(
52+
unit.to_str(),
53+
self._expected_msg(
54+
target="//foo:bar",
55+
errors="Includes which are not available from the direct dependencies:"
56+
"\n File='foo', include='missing'",
57+
report="some/report.json",
58+
),
59+
)
60+
# The report is not mentioned in the json file as it would be redundant
61+
self.assertEqual(
62+
unit.to_json(),
63+
"""
64+
{
65+
"analyzed_target": "//foo:bar",
66+
"public_includes_without_dep": {},
67+
"private_includes_without_dep": {
68+
"foo": [
69+
"missing"
70+
]
71+
},
72+
"unused_deps": [],
73+
"unused_implementation_deps": [],
74+
"deps_which_should_be_private": [],
75+
"use_implementation_deps": false
76+
}
3677
""".lstrip(),
3778
)
3879

src/apply_fixes/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ py_library(
66
"apply_fixes.py",
77
"bazel_query.py",
88
"buildozer_executor.py",
9+
"get_dwyu_reports.py",
910
"search_missing_deps.py",
1011
"summary.py",
1112
"utils.py",

src/apply_fixes/apply_fixes.py

Lines changed: 4 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,15 @@
22

33
import json
44
import logging
5-
import shlex
6-
import sys
7-
from os import environ, walk
5+
from os import environ
86
from pathlib import Path
97
from typing import TYPE_CHECKING
108

119
from src.apply_fixes.bazel_query import BazelQuery
1210
from src.apply_fixes.buildozer_executor import BuildozerExecutor
11+
from src.apply_fixes.get_dwyu_reports import gather_reports, get_reports_search_dir
1312
from src.apply_fixes.search_missing_deps import search_missing_deps
14-
from src.apply_fixes.utils import execute_and_capture
13+
from src.apply_fixes.utils import args_string_to_list
1514

1615
if TYPE_CHECKING:
1716
from argparse import Namespace
@@ -29,10 +28,6 @@ def __init__(self, main_args: Namespace) -> None:
2928
self.add_missing_deps = main_args.fix_missing_deps or main_args.fix_all
3029

3130

32-
def args_string_to_list(args: str | None) -> list[str]:
33-
return shlex.split(args) if args else []
34-
35-
3631
def get_workspace(main_args: Namespace) -> Path | None:
3732
if main_args.workspace:
3833
return Path(main_args.workspace)
@@ -43,45 +38,6 @@ def get_workspace(main_args: Namespace) -> Path | None:
4338
return Path(workspace_root)
4439

4540

46-
def get_reports_search_dir(main_args: Namespace, workspace_root: Path) -> Path:
47-
"""
48-
Unless a dedicated search directory is provided, try to deduce the 'bazel-bin' dir.
49-
"""
50-
if main_args.search_path:
51-
return Path(main_args.search_path)
52-
53-
if main_args.use_bazel_info:
54-
process = execute_and_capture(
55-
cmd=[
56-
"bazel",
57-
*args_string_to_list(main_args.bazel_startup_args),
58-
"info",
59-
*args_string_to_list(main_args.bazel_args),
60-
"bazel-bin",
61-
],
62-
cwd=workspace_root,
63-
)
64-
return Path(process.stdout.strip())
65-
66-
bazel_bin_link = workspace_root / "bazel-bin"
67-
if not bazel_bin_link.is_dir():
68-
logging.fatal(f"ERROR: convenience symlink '{bazel_bin_link}' does not exist or is not a symlink.")
69-
sys.exit(1)
70-
return bazel_bin_link.resolve()
71-
72-
73-
def gather_reports(search_path: Path) -> list[Path]:
74-
"""
75-
We explicitly use os.walk() as it has better performance than Path.glob() in large and deeply nested file trees.
76-
"""
77-
reports = []
78-
for root, _, files in walk(search_path):
79-
for file in files:
80-
if file.endswith("_dwyu_report.json"):
81-
reports.append(Path(root) / file) # noqa: PERF401
82-
return reports
83-
84-
8541
def add_discovered_deps(
8642
discovered_public_deps: list[str],
8743
discovered_private_deps: list[str],
@@ -160,7 +116,7 @@ def main(args: Namespace) -> int:
160116
reports_search_dir = get_reports_search_dir(main_args=args, workspace_root=workspace)
161117
logging.debug(f"Reports search directory: '{reports_search_dir}'")
162118

163-
reports = gather_reports(reports_search_dir)
119+
reports = gather_reports(main_args=args, search_path=reports_search_dir)
164120
if not reports:
165121
logging.fatal(
166122
"""
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
from __future__ import annotations
2+
3+
import logging
4+
import sys
5+
from os import walk
6+
from pathlib import Path
7+
from typing import TYPE_CHECKING
8+
9+
from src.apply_fixes.utils import args_string_to_list, execute_and_capture
10+
11+
if TYPE_CHECKING:
12+
import argparse
13+
14+
15+
def gather_reports(main_args: argparse.Namespace, search_path: Path) -> list[Path]:
16+
if main_args.dwyu_log_file:
17+
from platform import system
18+
19+
bin_dir = "\\bin\\" if system() == "Windows" else "/bin/"
20+
return [search_path / log.split(bin_dir, 1)[1] for log in parse_dwyu_execution_log(main_args.dwyu_log_file)]
21+
22+
reports = []
23+
# We explicitly use os.walk() as it has better performance than Path.glob() in large and deeply nested file trees.
24+
for root, _, files in walk(search_path):
25+
for file in files:
26+
if file.endswith("_dwyu_report.json"):
27+
reports.append(Path(root) / file) # noqa: PERF401
28+
return reports
29+
30+
31+
def parse_dwyu_execution_log(log_file: Path) -> list[str]:
32+
dwyu_report_anchor = "DWYU Report: "
33+
with log_file.open() as log:
34+
return [
35+
line.strip().split(dwyu_report_anchor)[1] for line in log.readlines() if line.startswith(dwyu_report_anchor)
36+
]
37+
38+
39+
def get_reports_search_dir(main_args: argparse.Namespace, workspace_root: Path) -> Path:
40+
"""
41+
Unless an alternative method is selected, follow the convenience symlinks at the workspace root to discover the
42+
DWYU report files.
43+
"""
44+
if main_args.search_path:
45+
return Path(main_args.search_path)
46+
47+
if main_args.use_bazel_info:
48+
process = execute_and_capture(
49+
cmd=[
50+
"bazel",
51+
*args_string_to_list(main_args.bazel_startup_args),
52+
"info",
53+
*args_string_to_list(main_args.bazel_args),
54+
"bazel-bin",
55+
],
56+
cwd=workspace_root,
57+
)
58+
return Path(process.stdout.strip())
59+
60+
bazel_bin_link = workspace_root / "bazel-bin"
61+
if not bazel_bin_link.is_dir():
62+
logging.fatal(f"ERROR: convenience symlink '{bazel_bin_link}' does not exist.")
63+
sys.exit(1)
64+
return bazel_bin_link.resolve()

src/apply_fixes/main.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import logging
22
import sys
33
from argparse import ArgumentParser, Namespace, RawDescriptionHelpFormatter
4+
from pathlib import Path
45

56
from src.apply_fixes.apply_fixes import main
67

@@ -79,6 +80,20 @@ def cli() -> Namespace:
7980
deduce the Bazel output directory containing the DWYU report files. Or if you want to search only in a sub tree
8081
of the Bazel output directories.""",
8182
)
83+
parser.add_argument(
84+
"--dwyu-log-file",
85+
metavar="PATH",
86+
type=Path,
87+
help="""
88+
If discovering the DWYU report files in the bazel-bin is not feasible, one can instead pipe the command line
89+
output of executing the DWYU aspect into a log file and tell this script to extract the DWYU report paths from
90+
this execution log. This can be helpful when your workspace is so large, that crawling the corresponding
91+
'bazel-bin' directory is too slow for a satisfactory user experience. This script still has to be able to
92+
discover the location of the 'bazel-bin' directory. Meaning, the 'bazel-bin' convenience symlink at the
93+
workspace root should exists or if it is not available one of the following options should be used:
94+
['--use-bazel-info', '--search-path']. Please note when using '--search-path' you have to point exactly to the
95+
'bazel-bin' directory and can't point so sub directories.""",
96+
)
8297
parser.add_argument(
8398
"--use-cquery",
8499
action="store_true",
@@ -140,6 +155,10 @@ def cli() -> Namespace:
140155
logging.fatal("Please choose at least one of the 'fix-..' options")
141156
sys.exit(1)
142157

158+
if args.use_bazel_info and args.search_path:
159+
logging.fatal("Please choose only one options controlling the 'bazel-bin' directory discovery.")
160+
sys.exit(1)
161+
143162
return args
144163

145164

src/apply_fixes/test/BUILD

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@ py_test(
1212
deps = ["//src/apply_fixes:lib"],
1313
)
1414

15+
py_test(
16+
name = "get_dwyu_reports_test",
17+
srcs = ["get_dwyu_reports_test.py"],
18+
deps = ["//src/apply_fixes:lib"],
19+
)
20+
1521
py_test(
1622
name = "search_missing_deps",
1723
srcs = ["search_missing_deps.py"],
@@ -23,3 +29,9 @@ py_test(
2329
srcs = ["summary_test.py"],
2430
deps = ["//src/apply_fixes:lib"],
2531
)
32+
33+
py_test(
34+
name = "utils_test",
35+
srcs = ["utils_test.py"],
36+
deps = ["//src/apply_fixes:lib"],
37+
)
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import unittest
2+
from pathlib import Path
3+
4+
from src.apply_fixes.get_dwyu_reports import parse_dwyu_execution_log
5+
6+
7+
class TestParseDwyuExecutionLog(unittest.TestCase):
8+
def test_parse_dwyu_execution_log(self) -> None:
9+
test_log = Path("test_log.txt")
10+
with test_log.open(mode="wt") as fp:
11+
fp.write(
12+
"""
13+
Some unrelated stuff
14+
DWYU Report: bazel-out/opt/bin/some/target_dwyu_report.json
15+
ERROR: Unrelated error
16+
DWYU Report: bazel-out/opt/bin/root_target_dwyu_report.json
17+
""".strip()
18+
)
19+
20+
logs = parse_dwyu_execution_log(test_log)
21+
self.assertEqual(
22+
logs, ["bazel-out/opt/bin/some/target_dwyu_report.json", "bazel-out/opt/bin/root_target_dwyu_report.json"]
23+
)
24+
25+
def test_parse_dwyu_execution_log_empty(self) -> None:
26+
test_log = Path("test_log.txt")
27+
with test_log.open(mode="wt") as fp:
28+
fp.write("")
29+
30+
logs = parse_dwyu_execution_log(test_log)
31+
self.assertEqual(logs, [])
32+
33+
34+
if __name__ == "__main__":
35+
unittest.main()

0 commit comments

Comments
 (0)