Skip to content

Commit 15d2c1c

Browse files
Refactor mention parsing for clarity and maintainability
1 parent 8012cfc commit 15d2c1c

File tree

4 files changed

+345
-186
lines changed

4 files changed

+345
-186
lines changed

src/django_github_app/mentions.py

Lines changed: 132 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ def all_events(cls) -> list[EventAction]:
4848

4949
@classmethod
5050
def from_event(cls, event: sansio.Event) -> MentionScope | None:
51-
"""Determine the scope of a GitHub event based on its type and context."""
5251
if event.event == "issue_comment":
5352
issue = event.data.get("issue", {})
5453
is_pull_request = (
@@ -65,128 +64,134 @@ def from_event(cls, event: sansio.Event) -> MentionScope | None:
6564

6665

6766
@dataclass
68-
class Mention:
67+
class RawMention:
68+
match: re.Match[str]
6969
username: str
70-
text: str
7170
position: int
72-
line_number: int
73-
line_text: str
74-
match: re.Match[str] | None = None
75-
previous_mention: Mention | None = None
76-
next_mention: Mention | None = None
71+
end: int
7772

7873

79-
def check_pattern_match(
80-
text: str, pattern: str | re.Pattern[str] | None
81-
) -> re.Match[str] | None:
82-
"""Check if text matches the given pattern (string or regex).
74+
CODE_BLOCK_PATTERN = re.compile(r"```[\s\S]*?```", re.MULTILINE)
75+
INLINE_CODE_PATTERN = re.compile(r"`[^`]+`")
76+
BLOCKQUOTE_PATTERN = re.compile(r"^\s*>.*$", re.MULTILINE)
8377

84-
Returns Match object if pattern matches, None otherwise.
85-
If pattern is None, returns a dummy match object.
86-
"""
87-
if pattern is None:
88-
return re.match(r"(.*)", text, re.IGNORECASE | re.DOTALL)
8978

90-
# Check if it's a compiled regex pattern
91-
if isinstance(pattern, re.Pattern):
92-
# Use the pattern directly, preserving its flags
93-
return pattern.match(text)
79+
# GitHub username rules:
80+
# - 1-39 characters long
81+
# - Can only contain alphanumeric characters or hyphens
82+
# - Cannot start or end with a hyphen
83+
# - Cannot have multiple consecutive hyphens
84+
GITHUB_MENTION_PATTERN = re.compile(
85+
r"(?:^|(?<=\s))@([a-z\d](?:[a-z\d]|-(?=[a-z\d])){0,38})",
86+
re.MULTILINE | re.IGNORECASE,
87+
)
9488

95-
# For strings, do exact match (case-insensitive)
96-
# Escape the string to treat it literally
97-
escaped_pattern = re.escape(pattern)
98-
return re.match(escaped_pattern, text, re.IGNORECASE)
9989

90+
def extract_all_mentions(text: str) -> list[RawMention]:
91+
# replace all code blocks, inline code, and blockquotes with spaces
92+
# this preserves linenos and postitions while not being able to
93+
# match against anything in them
94+
processed_text = CODE_BLOCK_PATTERN.sub(lambda m: " " * len(m.group(0)), text)
95+
processed_text = INLINE_CODE_PATTERN.sub(
96+
lambda m: " " * len(m.group(0)), processed_text
97+
)
98+
processed_text = BLOCKQUOTE_PATTERN.sub(
99+
lambda m: " " * len(m.group(0)), processed_text
100+
)
101+
return [
102+
RawMention(
103+
match=match,
104+
username=match.group(1),
105+
position=match.start(),
106+
end=match.end(),
107+
)
108+
for match in GITHUB_MENTION_PATTERN.finditer(processed_text)
109+
]
100110

101-
CODE_BLOCK_PATTERN = re.compile(r"```[\s\S]*?```", re.MULTILINE)
102-
INLINE_CODE_PATTERN = re.compile(r"`[^`]+`")
103-
QUOTE_PATTERN = re.compile(r"^\s*>.*$", re.MULTILINE)
104111

112+
class LineInfo(NamedTuple):
113+
lineno: int
114+
text: str
105115

106-
def parse_mentions_for_username(
107-
event: sansio.Event, username_pattern: str | re.Pattern[str] | None = None
108-
) -> list[Mention]:
109-
comment = event.data.get("comment", {})
110-
if comment is None:
111-
comment = {}
112-
body = comment.get("body", "")
116+
@classmethod
117+
def for_mention_in_comment(cls, comment: str, mention_position: int):
118+
lines = comment.splitlines()
119+
text_before = comment[:mention_position]
120+
line_number = text_before.count("\n") + 1
113121

114-
if not body:
115-
return []
122+
line_index = line_number - 1
123+
line_text = lines[line_index] if line_index < len(lines) else ""
116124

117-
# If no pattern specified, use bot username (TODO: get from settings)
118-
if username_pattern is None:
119-
username_pattern = "bot" # Placeholder
125+
return cls(lineno=line_number, text=line_text)
120126

121-
# Handle regex patterns vs literal strings
122-
if isinstance(username_pattern, re.Pattern):
123-
# Use the pattern string directly, preserving any flags
124-
username_regex = username_pattern.pattern
125-
# Extract flags from the compiled pattern
126-
flags = username_pattern.flags | re.MULTILINE | re.IGNORECASE
127-
else:
128-
# For strings, escape them to be treated literally
129-
username_regex = re.escape(username_pattern)
130-
flags = re.MULTILINE | re.IGNORECASE
131127

132-
original_body = body
133-
original_lines = original_body.splitlines()
128+
def extract_mention_text(
129+
body: str, current_index: int, all_mentions: list[RawMention], mention_end: int
130+
) -> str:
131+
text_start = mention_end
134132

135-
processed_text = CODE_BLOCK_PATTERN.sub(lambda m: " " * len(m.group(0)), body)
136-
processed_text = INLINE_CODE_PATTERN.sub(
137-
lambda m: " " * len(m.group(0)), processed_text
138-
)
139-
processed_text = QUOTE_PATTERN.sub(lambda m: " " * len(m.group(0)), processed_text)
133+
# Find next @mention (any mention, not just matched ones) to know where this text ends
134+
next_mention_index = None
135+
for j in range(current_index + 1, len(all_mentions)):
136+
next_mention_index = j
137+
break
140138

141-
# Use \S+ to match non-whitespace characters for username
142-
# Special handling for patterns that could match too broadly
143-
if ".*" in username_regex:
144-
# Replace .* with a more specific pattern that won't match spaces or @
145-
username_regex = username_regex.replace(".*", r"[^@\s]*")
139+
if next_mention_index is not None:
140+
text_end = all_mentions[next_mention_index].position
141+
else:
142+
text_end = len(body)
146143

147-
mention_pattern = re.compile(
148-
rf"(?:^|(?<=\s))@({username_regex})(?:\s|$|(?=[^\w\-]))",
149-
flags,
150-
)
144+
return body[text_start:text_end].strip()
151145

152-
mentions: list[Mention] = []
153146

154-
for match in mention_pattern.finditer(processed_text):
155-
position = match.start() # Position of @
156-
username = match.group(1) # Captured username
147+
@dataclass
148+
class ParsedMention:
149+
username: str
150+
text: str
151+
position: int
152+
line_info: LineInfo
153+
match: re.Match[str] | None = None
154+
previous_mention: ParsedMention | None = None
155+
next_mention: ParsedMention | None = None
157156

158-
text_before = original_body[:position]
159-
line_number = text_before.count("\n") + 1
160157

161-
line_index = line_number - 1
162-
line_text = (
163-
original_lines[line_index] if line_index < len(original_lines) else ""
164-
)
158+
def extract_mentions_from_event(
159+
event: sansio.Event, username_pattern: str | re.Pattern[str] | None = None
160+
) -> list[ParsedMention]:
161+
comment_data = event.data.get("comment", {})
162+
if comment_data is None:
163+
comment_data = {}
164+
comment = comment_data.get("body", "")
165165

166-
text_start = match.end()
166+
if not comment:
167+
return []
167168

168-
# Find next @mention to know where this text ends
169-
next_match = mention_pattern.search(processed_text, match.end())
170-
if next_match:
171-
text_end = next_match.start()
172-
else:
173-
text_end = len(original_body)
174-
175-
text = original_body[text_start:text_end].strip()
176-
177-
mention = Mention(
178-
username=username,
179-
text=text,
180-
position=position,
181-
line_number=line_number,
182-
line_text=line_text,
183-
match=None,
184-
previous_mention=None,
185-
next_mention=None,
186-
)
169+
# If no pattern specified, use bot username (TODO: get from settings)
170+
if username_pattern is None:
171+
username_pattern = "bot" # Placeholder
187172

188-
mentions.append(mention)
173+
mentions: list[ParsedMention] = []
174+
potential_mentions = extract_all_mentions(comment)
175+
for i, raw_mention in enumerate(potential_mentions):
176+
if not matches_pattern(raw_mention.username, username_pattern):
177+
continue
178+
179+
text = extract_mention_text(comment, i, potential_mentions, raw_mention.end)
180+
line_info = LineInfo.for_mention_in_comment(comment, raw_mention.position)
181+
182+
mentions.append(
183+
ParsedMention(
184+
username=raw_mention.username,
185+
text=text,
186+
position=raw_mention.position,
187+
line_info=line_info,
188+
match=None,
189+
previous_mention=None,
190+
next_mention=None,
191+
)
192+
)
189193

194+
# link mentions
190195
for i, mention in enumerate(mentions):
191196
if i > 0:
192197
mention.previous_mention = mentions[i - 1]
@@ -202,11 +207,10 @@ class Comment:
202207
author: str
203208
created_at: datetime
204209
url: str
205-
mentions: list[Mention]
210+
mentions: list[ParsedMention]
206211

207212
@property
208213
def line_count(self) -> int:
209-
"""Number of lines in the comment."""
210214
if not self.body:
211215
return 0
212216
return len(self.body.splitlines())
@@ -224,8 +228,7 @@ def from_event(cls, event: sansio.Event) -> Comment:
224228
if not comment_data:
225229
raise ValueError(f"Cannot extract comment from event type: {event.event}")
226230

227-
created_at_str = comment_data.get("created_at", "")
228-
if created_at_str:
231+
if created_at_str := comment_data.get("created_at", ""):
229232
# GitHub timestamps are in ISO format: 2024-01-01T12:00:00Z
230233
created_at_aware = datetime.fromisoformat(
231234
created_at_str.replace("Z", "+00:00")
@@ -253,9 +256,9 @@ def from_event(cls, event: sansio.Event) -> Comment:
253256

254257

255258
@dataclass
256-
class MentionEvent:
259+
class Mention:
257260
comment: Comment
258-
triggered_by: Mention
261+
mention: ParsedMention
259262
scope: MentionScope | None
260263

261264
@classmethod
@@ -271,7 +274,7 @@ def from_event(
271274
if scope is not None and event_scope != scope:
272275
return
273276

274-
mentions = parse_mentions_for_username(event, username)
277+
mentions = extract_mentions_from_event(event, username)
275278
if not mentions:
276279
return
277280

@@ -280,13 +283,36 @@ def from_event(
280283

281284
for mention in mentions:
282285
if pattern is not None:
283-
match = check_pattern_match(mention.text, pattern)
286+
match = get_match(mention.text, pattern)
284287
if not match:
285288
continue
286289
mention.match = match
287290

288291
yield cls(
289292
comment=comment,
290-
triggered_by=mention,
293+
mention=mention,
291294
scope=event_scope,
292295
)
296+
297+
298+
def matches_pattern(text: str, pattern: str | re.Pattern[str] | None) -> bool:
299+
match pattern:
300+
case None:
301+
return True
302+
case re.Pattern():
303+
return pattern.fullmatch(text) is not None
304+
case str():
305+
return text.strip().lower() == pattern.strip().lower()
306+
307+
308+
def get_match(text: str, pattern: str | re.Pattern[str] | None) -> re.Match[str] | None:
309+
match pattern:
310+
case None:
311+
return re.match(r"(.*)", text, re.IGNORECASE | re.DOTALL)
312+
case re.Pattern():
313+
# Use the pattern directly, preserving its flags
314+
return pattern.match(text)
315+
case str():
316+
# For strings, do exact match (case-insensitive)
317+
# Escape the string to treat it literally
318+
return re.match(re.escape(pattern), text, re.IGNORECASE)

src/django_github_app/routing.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from ._typing import override
1818
from .github import AsyncGitHubAPI
1919
from .github import SyncGitHubAPI
20-
from .mentions import MentionEvent
20+
from .mentions import Mention
2121
from .mentions import MentionScope
2222

2323
AsyncCallback = Callable[..., Awaitable[None]]
@@ -83,19 +83,19 @@ def decorator(func: CB) -> CB:
8383
async def async_wrapper(
8484
event: sansio.Event, gh: AsyncGitHubAPI, *args: Any, **kwargs: Any
8585
) -> None:
86-
for context in MentionEvent.from_event(
86+
for mention in Mention.from_event(
8787
event, username=username, pattern=pattern, scope=scope
8888
):
89-
await func(event, gh, *args, context=context, **kwargs) # type: ignore[func-returns-value]
89+
await func(event, gh, *args, context=mention, **kwargs) # type: ignore[func-returns-value]
9090

9191
@wraps(func)
9292
def sync_wrapper(
9393
event: sansio.Event, gh: SyncGitHubAPI, *args: Any, **kwargs: Any
9494
) -> None:
95-
for context in MentionEvent.from_event(
95+
for mention in Mention.from_event(
9696
event, username=username, pattern=pattern, scope=scope
9797
):
98-
func(event, gh, *args, context=context, **kwargs)
98+
func(event, gh, *args, context=mention, **kwargs)
9999

100100
wrapper: MentionHandler
101101
if iscoroutinefunction(func):

0 commit comments

Comments
 (0)