|
1 | 1 | import json |
2 | | -import re |
3 | 2 |
|
| 3 | +import regex as re |
4 | 4 | import structlog |
5 | 5 | from litellm import ChatCompletionRequest |
6 | 6 |
|
|
19 | 19 | logger = structlog.get_logger("codegate") |
20 | 20 |
|
21 | 21 |
|
| 22 | +# Pre-compiled regex patterns for performance |
| 23 | +markdown_code_block = re.compile(r"```.*?```", flags=re.DOTALL) |
| 24 | +markdown_file_listing = re.compile(r"⋮...*?⋮...\n\n", flags=re.DOTALL) |
| 25 | +environment_details = re.compile(r"<environment_details>.*?</environment_details>", flags=re.DOTALL) |
| 26 | + |
| 27 | + |
22 | 28 | class CodegateContextRetriever(PipelineStep): |
23 | 29 | """ |
24 | 30 | Pipeline step that adds a context message to the completion request when it detects |
@@ -95,11 +101,9 @@ async def process( # noqa: C901 |
95 | 101 |
|
96 | 102 | # Remove code snippets and file listing from the user messages and search for bad packages |
97 | 103 | # in the rest of the user query/messsages |
98 | | - user_messages = re.sub(r"```.*?```", "", user_message, flags=re.DOTALL) |
99 | | - user_messages = re.sub(r"⋮...*?⋮...\n\n", "", user_messages, flags=re.DOTALL) |
100 | | - user_messages = re.sub( |
101 | | - r"<environment_details>.*?</environment_details>", "", user_messages, flags=re.DOTALL |
102 | | - ) |
| 104 | + user_messages = markdown_code_block.sub("", user_message) |
| 105 | + user_messages = markdown_file_listing.sub("", user_messages) |
| 106 | + user_messages = environment_details.sub("", user_messages) |
103 | 107 |
|
104 | 108 | # split messages into double newlines, to avoid passing so many content in the search |
105 | 109 | split_messages = re.split(r"</?task>|\n|\\n", user_messages) |
|
0 commit comments