Skip to content

Commit 53420bc

Browse files
authored
Merge pull request #2161 from strictdoc-project/stanislaw/multiline_markers
backend/sdoc_source_code: parse multiline @relation markers
2 parents ecf6e0d + 02a486a commit 53420bc

File tree

12 files changed

+403
-63
lines changed

12 files changed

+403
-63
lines changed
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import re
2+
from typing import Match
3+
4+
WS = "[ \t]"
5+
6+
7+
def preprocess_source_code_comment(comment: str) -> str:
8+
"""
9+
Remove all Doxygen/Python/etc comment markers for processing.
10+
"""
11+
12+
def replace_with_spaces(match: Match[str]) -> str:
13+
# Return a string of spaces with the same length as the matched text.
14+
return " " * len(match.group(0))
15+
16+
return re.sub(
17+
rf"(^/\*\*)|^{WS}*\*/?|(^///)|(^//)|(^#+)",
18+
replace_with_spaces,
19+
comment,
20+
flags=re.MULTILINE,
21+
)

strictdoc/backend/sdoc_source_code/marker_parser.py

Lines changed: 73 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import re
22
from typing import List, Optional, Union
33

4+
from strictdoc.backend.sdoc_source_code.helpers.comment_preprocessor import (
5+
preprocess_source_code_comment,
6+
)
47
from strictdoc.backend.sdoc_source_code.models.function_range_marker import (
58
FunctionRangeMarker,
69
)
@@ -12,9 +15,10 @@
1215

1316
REGEX_REQ = r"[A-Za-z][A-Za-z0-9_\/\.\\-]+"
1417
REGEX_ROLE = r"[A-Za-z][A-Za-z0-9\\-]+"
18+
1519
# @relation(REQ-1, scope=function) or @relation{REQ-1, scope=function}
1620
REGEX_MARKER = re.compile(
17-
rf"@relation[({{]({REGEX_REQ}(?:, {REGEX_REQ})*), scope=(file|class|function|line|range_start|range_end)(?:, role=({REGEX_ROLE}))?[)}}]"
21+
rf"@relation[({{](\s*{REGEX_REQ}(?:,\s+{REGEX_REQ})*),\s+scope=(file|class|function|line|range_start|range_end)(?:,\s+role=({REGEX_ROLE}))?[\s*)}}]"
1822
)
1923

2024

@@ -25,37 +29,77 @@ def parse(
2529
line_start: int,
2630
line_end: int,
2731
comment_line_start: int,
28-
comment_column_start: int,
2932
entity_name: Optional[str] = None,
3033
) -> List[Union[FunctionRangeMarker, RangeMarker, LineMarker]]:
34+
"""
35+
Parse relation markers from source file comments.
36+
37+
Before the actual parsing, the function removes all code comment symbols
38+
such as /** ... */ or /// Doxygen comments or Python
39+
40+
The line start/end indicate a range where the comment is located in the
41+
source file.
42+
The comment_line_start parameter maybe the same as line start but can
43+
also be different if the comment is part of a Python or C function in
44+
which case the comment_line_start will be several lines below the actual
45+
start of the range.
46+
"""
47+
3148
markers: List[Union[FunctionRangeMarker, RangeMarker, LineMarker]] = []
32-
for input_line_idx_, input_line_ in enumerate(
33-
input_string.splitlines()
34-
):
35-
match = REGEX_MARKER.search(input_line_)
36-
if match is None:
37-
continue
3849

39-
assert match.lastindex is not None
40-
marker_type = match.group(2)
41-
marker_role = match.group(3) if len(match.groups()) >= 3 else None
42-
req_list = match.group(1)
50+
input_string = preprocess_source_code_comment(input_string)
51+
52+
matches = REGEX_MARKER.finditer(input_string)
53+
for match_ in matches:
54+
assert match_.lastindex is not None
55+
marker_type = match_.group(2)
56+
marker_role = match_.group(3) if len(match_.groups()) >= 3 else None
57+
req_list = match_.group(1)
58+
59+
marker_start_index = match_.start(0)
60+
61+
marker_start_line = comment_line_start + input_string.count(
62+
"\n", 0, marker_start_index
63+
)
4364

44-
first_requirement_index = match.start(1)
65+
marker_line_start_index = input_string.rfind(
66+
"\n", 0, marker_start_index
67+
)
68+
marker_line_start_index = (
69+
0
70+
if marker_line_start_index == -1
71+
else marker_line_start_index + 1
72+
)
73+
74+
marker_start_column = (
75+
marker_start_index - marker_line_start_index
76+
) + 1
77+
78+
all_reqs_start_index = match_.start(1)
4579

46-
current_line = comment_line_start + input_line_idx_
47-
first_requirement_column = first_requirement_index + 1
48-
if input_line_idx_ == 0:
49-
first_requirement_column += comment_column_start - 1
5080
requirements = []
5181
for req_match in re.finditer(REGEX_REQ, req_list):
52-
req_item = req_match.group(0) # Matched REQ-XXX item
53-
# Calculate actual position relative to the original string
54-
start_index = (
55-
req_match.start()
56-
) # Offset by where group 1 starts
82+
req_start_index = all_reqs_start_index + req_match.start()
83+
last_newline_pos = input_string.rfind("\n", 0, req_start_index)
84+
85+
line_start_index = (
86+
0 if last_newline_pos == -1 else last_newline_pos + 1
87+
)
88+
89+
req_abs_line = comment_line_start + input_string.count(
90+
"\n", 0, req_start_index
91+
)
92+
93+
first_requirement_index = match_.start(1)
94+
first_requirement_column = (
95+
first_requirement_index - line_start_index
96+
) + 1
97+
98+
req_item = req_match.group(0)
99+
100+
start_index = req_match.start()
57101
requirement = Req(None, req_item)
58-
requirement.ng_source_line = current_line
102+
requirement.ng_source_line = req_abs_line
59103
requirement.ng_source_column = (
60104
first_requirement_column + start_index
61105
)
@@ -65,11 +109,10 @@ def parse(
65109
function_marker = FunctionRangeMarker(
66110
None, requirements, scope=marker_type, role=marker_role
67111
)
68-
function_marker.ng_source_line_begin = line_start
112+
function_marker.ng_source_line_begin = marker_start_line
113+
function_marker.ng_source_column_begin = marker_start_column
69114
function_marker.ng_range_line_begin = line_start
70115
function_marker.ng_range_line_end = line_end
71-
function_marker.ng_marker_line = current_line
72-
function_marker.ng_marker_column = first_requirement_column
73116
if marker_type == "file":
74117
function_marker.set_description("entire file")
75118
elif marker_type == "function":
@@ -85,16 +128,16 @@ def parse(
85128
requirements,
86129
role=marker_role,
87130
)
88-
range_marker.ng_source_line_begin = line_start
89-
range_marker.ng_source_column_begin = first_requirement_column
131+
range_marker.ng_source_line_begin = marker_start_line
132+
range_marker.ng_source_column_begin = marker_start_column
90133
range_marker.ng_range_line_begin = line_start
91134
range_marker.ng_range_line_end = line_end
92135
range_marker.ng_new_relation_keyword = True
93136
markers.append(range_marker)
94137
elif marker_type == "line":
95138
line_marker = LineMarker(None, requirements, role=marker_role)
96-
line_marker.ng_source_line_begin = line_start
97-
line_marker.ng_source_column_begin = first_requirement_column
139+
line_marker.ng_source_line_begin = marker_start_line
140+
line_marker.ng_source_column_begin = marker_start_column
98141
line_marker.ng_range_line_begin = line_start
99142
line_marker.ng_range_line_end = line_end
100143
markers.append(line_marker)

strictdoc/backend/sdoc_source_code/reader.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -205,10 +205,6 @@ def line_marker_processor(line_marker: LineMarker, parse_context: ParseContext):
205205
def function_range_marker_processor(
206206
function_range_marker: FunctionRangeMarker, parse_context: ParseContext
207207
):
208-
location = get_location(function_range_marker)
209-
line = location["line"]
210-
column = location["col"]
211-
212208
if (
213209
len(parse_context.marker_stack) > 0
214210
and parse_context.marker_stack[-1].ng_is_nodoc
@@ -222,8 +218,6 @@ def function_range_marker_processor(
222218
function_range_marker.ng_range_line_end = (
223219
parse_context.file_stats.lines_total
224220
)
225-
function_range_marker.ng_marker_line = line
226-
function_range_marker.ng_marker_column = column
227221

228222
for req in function_range_marker.reqs:
229223
markers = parse_context.map_reqs_to_markers.setdefault(req, [])

strictdoc/backend/sdoc_source_code/reader_c.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@ def read(
8080
if input_buffer[-1] == 10
8181
else node_.end_point[0] + 1,
8282
node_.start_point[0] + 1,
83-
node_.start_point[1] + 1,
8483
)
8584
for marker_ in markers:
8685
if not isinstance(marker_, FunctionRangeMarker):
@@ -179,7 +178,6 @@ def read(
179178
function_comment_node.start_point[0] + 1,
180179
function_last_line,
181180
function_comment_node.start_point[0] + 1,
182-
function_comment_node.start_point[1] + 1,
183181
entity_name=function_display_name,
184182
)
185183
for marker_ in markers:
@@ -278,7 +276,6 @@ def read(
278276
function_comment_node.start_point[0] + 1,
279277
function_last_line,
280278
function_comment_node.start_point[0] + 1,
281-
function_comment_node.start_point[1] + 1,
282279
entity_name=function_display_name,
283280
)
284281
for marker_ in markers:
@@ -327,7 +324,6 @@ def read(
327324
node_.start_point[0] + 1,
328325
node_.end_point[0] + 1,
329326
node_.start_point[0] + 1,
330-
node_.start_point[1] + 1,
331327
)
332328
for marker_ in markers:
333329
if isinstance(marker_, RangeMarker) and (

strictdoc/backend/sdoc_source_code/reader_python.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,6 @@ def read(
106106
if input_buffer[-1] == 10
107107
else node_.end_point[0] + 1,
108108
string_content.start_point[0] + 1,
109-
string_content.start_point[1] + 1,
110109
)
111110
for marker_ in markers:
112111
if isinstance(marker_, FunctionRangeMarker) and (
@@ -165,7 +164,6 @@ def read(
165164
node_.start_point[0] + 1,
166165
node_.end_point[0] + 1,
167166
string_content.start_point[0] + 1,
168-
string_content.start_point[1] + 1,
169167
function_name,
170168
)
171169
for marker_ in markers:
@@ -230,7 +228,6 @@ def read(
230228
node_.start_point[0] + 1,
231229
node_.end_point[0] + 1,
232230
node_.start_point[0] + 1,
233-
node_.start_point[1] + 1,
234231
None,
235232
)
236233
for marker_ in markers:

strictdoc/core/file_traceability_index.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -711,11 +711,8 @@ def forward_function_marker_from_function(
711711
parent=None, reqs_objs=reqs, scope=marker_type.value
712712
)
713713
function_marker.ng_source_line_begin = function.line_begin
714-
function_marker.ng_source_column_begin = 1
715714
function_marker.ng_range_line_begin = function.line_begin
716715
function_marker.ng_range_line_end = function.line_end
717-
function_marker.ng_marker_line = function.line_begin
718-
function_marker.ng_marker_column = 1
719716
function_marker.role = role
720717
if description is not None:
721718
function_marker.set_description(description)
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# The tests in this file has strings with whitespace that must not be linted.
2+
# ruff: noqa: W291,W293
3+
from strictdoc.backend.sdoc_source_code.helpers.comment_preprocessor import (
4+
preprocess_source_code_comment,
5+
)
6+
7+
8+
def test_001_doxygen_slashes_and_stars():
9+
source_input = """\
10+
/**
11+
* @relation(REQ-1, scope=function)
12+
*/
13+
"""
14+
15+
preprocessed_comment = preprocess_source_code_comment(source_input)
16+
17+
# Note: There are invisible characters.
18+
assert (
19+
preprocessed_comment
20+
== """\
21+
22+
@relation(REQ-1, scope=function)
23+
24+
"""
25+
)
26+
27+
28+
def test_001_doxygen_three_slashes():
29+
source_input = """\
30+
///
31+
/// @relation(REQ-1, scope=function)
32+
///
33+
"""
34+
35+
preprocessed_comment = preprocess_source_code_comment(source_input)
36+
37+
# Note: There are invisible characters.
38+
assert (
39+
preprocessed_comment
40+
== """\
41+
42+
@relation(REQ-1, scope=function)
43+
44+
"""
45+
)
46+
47+
48+
def test_003_doxygen_two_slashes():
49+
source_input = """\
50+
//
51+
// @relation(REQ-1, scope=function)
52+
//
53+
"""
54+
55+
preprocessed_comment = preprocess_source_code_comment(source_input)
56+
57+
# Note: There are invisible characters.
58+
assert (
59+
preprocessed_comment
60+
== """\
61+
62+
@relation(REQ-1, scope=function)
63+
64+
"""
65+
)
66+
67+
68+
def test_004_python():
69+
source_input = """\
70+
#
71+
# @relation(REQ-1, scope=function)
72+
#
73+
"""
74+
75+
preprocessed_comment = preprocess_source_code_comment(source_input)
76+
77+
# Note: There are invisible characters.
78+
assert (
79+
preprocessed_comment
80+
== """\
81+
82+
@relation(REQ-1, scope=function)
83+
84+
"""
85+
)

0 commit comments

Comments
 (0)