Skip to content

Commit 72de5b3

Browse files
authored
chore: migrate to own glob parser (#2230)
1 parent 73616f4 commit 72de5b3

File tree

7 files changed

+123
-14
lines changed

7 files changed

+123
-14
lines changed

playwright/_impl/_glob.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# Copyright (c) Microsoft Corporation.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import re
15+
16+
# https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions#escaping
17+
escaped_chars = {"$", "^", "+", ".", "*", "(", ")", "|", "\\", "?", "{", "}", "[", "]"}
18+
19+
20+
def glob_to_regex(glob: str) -> "re.Pattern[str]":
21+
tokens = ["^"]
22+
in_group = False
23+
24+
i = 0
25+
while i < len(glob):
26+
c = glob[i]
27+
if c == "\\" and i + 1 < len(glob):
28+
char = glob[i + 1]
29+
tokens.append("\\" + char if char in escaped_chars else char)
30+
i += 1
31+
elif c == "*":
32+
before_deep = glob[i - 1] if i > 0 else None
33+
star_count = 1
34+
while i + 1 < len(glob) and glob[i + 1] == "*":
35+
star_count += 1
36+
i += 1
37+
after_deep = glob[i + 1] if i + 1 < len(glob) else None
38+
is_deep = (
39+
star_count > 1
40+
and (before_deep == "/" or before_deep is None)
41+
and (after_deep == "/" or after_deep is None)
42+
)
43+
if is_deep:
44+
tokens.append("((?:[^/]*(?:/|$))*)")
45+
i += 1
46+
else:
47+
tokens.append("([^/]*)")
48+
else:
49+
if c == "?":
50+
tokens.append(".")
51+
elif c == "[":
52+
tokens.append("[")
53+
elif c == "]":
54+
tokens.append("]")
55+
elif c == "{":
56+
in_group = True
57+
tokens.append("(")
58+
elif c == "}":
59+
in_group = False
60+
tokens.append(")")
61+
elif c == "," and in_group:
62+
tokens.append("|")
63+
else:
64+
tokens.append("\\" + c if c in escaped_chars else c)
65+
i += 1
66+
67+
tokens.append("$")
68+
return re.compile("".join(tokens))

playwright/_impl/_helper.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414
import asyncio
15-
import fnmatch
1615
import inspect
1716
import math
1817
import os
@@ -41,6 +40,7 @@
4140

4241
from playwright._impl._api_structures import NameValue
4342
from playwright._impl._errors import Error, TargetClosedError, TimeoutError
43+
from playwright._impl._glob import glob_to_regex
4444
from playwright._impl._str_utils import escape_regex_flags
4545

4646
if sys.version_info >= (3, 8): # pragma: no cover
@@ -149,7 +149,7 @@ def __init__(self, base_url: Union[str, None], match: URLMatch) -> None:
149149
if isinstance(match, str):
150150
if base_url and not match.startswith("*"):
151151
match = urljoin(base_url, match)
152-
regex = fnmatch.translate(match)
152+
regex = glob_to_regex(match)
153153
self._regex_obj = re.compile(regex)
154154
elif isinstance(match, Pattern):
155155
self._regex_obj = match

tests/async/test_browsercontext_request_fallback.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -185,10 +185,9 @@ async def handler_with_header_mods(route: Route) -> None:
185185
await context.route("**/*", handler_with_header_mods)
186186

187187
await page.goto(server.EMPTY_PAGE)
188-
async with page.expect_request("/sleep.zzz") as request_info:
188+
with server.expect_request("/sleep.zzz") as server_request_info:
189189
await page.evaluate("() => fetch('/sleep.zzz')")
190-
request = await request_info.value
191-
values.append(request.headers.get("foo"))
190+
values.append(server_request_info.value.getHeader("foo"))
192191
assert values == ["bar", "bar", "bar"]
193192

194193

tests/async/test_interception.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import pytest
2222

23+
from playwright._impl._glob import glob_to_regex
2324
from playwright.async_api import (
2425
Browser,
2526
BrowserContext,
@@ -1041,3 +1042,47 @@ async def handle_request(route: Route) -> None:
10411042
assert response
10421043
assert response.status == 200
10431044
assert await response.json() == {"foo": "bar"}
1045+
1046+
1047+
async def test_glob_to_regex() -> None:
1048+
assert glob_to_regex("**/*.js").match("https://localhost:8080/foo.js")
1049+
assert not glob_to_regex("**/*.css").match("https://localhost:8080/foo.js")
1050+
assert not glob_to_regex("*.js").match("https://localhost:8080/foo.js")
1051+
assert glob_to_regex("https://**/*.js").match("https://localhost:8080/foo.js")
1052+
assert glob_to_regex("http://localhost:8080/simple/path.js").match(
1053+
"http://localhost:8080/simple/path.js"
1054+
)
1055+
assert glob_to_regex("http://localhost:8080/?imple/path.js").match(
1056+
"http://localhost:8080/Simple/path.js"
1057+
)
1058+
assert glob_to_regex("**/{a,b}.js").match("https://localhost:8080/a.js")
1059+
assert glob_to_regex("**/{a,b}.js").match("https://localhost:8080/b.js")
1060+
assert not glob_to_regex("**/{a,b}.js").match("https://localhost:8080/c.js")
1061+
1062+
assert glob_to_regex("**/*.{png,jpg,jpeg}").match("https://localhost:8080/c.jpg")
1063+
assert glob_to_regex("**/*.{png,jpg,jpeg}").match("https://localhost:8080/c.jpeg")
1064+
assert glob_to_regex("**/*.{png,jpg,jpeg}").match("https://localhost:8080/c.png")
1065+
assert not glob_to_regex("**/*.{png,jpg,jpeg}").match(
1066+
"https://localhost:8080/c.css"
1067+
)
1068+
assert glob_to_regex("foo*").match("foo.js")
1069+
assert not glob_to_regex("foo*").match("foo/bar.js")
1070+
assert not glob_to_regex("http://localhost:3000/signin-oidc*").match(
1071+
"http://localhost:3000/signin-oidc/foo"
1072+
)
1073+
assert glob_to_regex("http://localhost:3000/signin-oidc*").match(
1074+
"http://localhost:3000/signin-oidcnice"
1075+
)
1076+
1077+
assert glob_to_regex("**/three-columns/settings.html?**id=[a-z]**").match(
1078+
"http://mydomain:8080/blah/blah/three-columns/settings.html?id=settings-e3c58efe-02e9-44b0-97ac-dd138100cf7c&blah"
1079+
)
1080+
1081+
assert glob_to_regex("\\?") == re.compile(r"^\?$")
1082+
assert glob_to_regex("\\") == re.compile(r"^\\$")
1083+
assert glob_to_regex("\\\\") == re.compile(r"^\\$")
1084+
assert glob_to_regex("\\[") == re.compile(r"^\[$")
1085+
assert glob_to_regex("[a-z]") == re.compile(r"^[a-z]$")
1086+
assert glob_to_regex("$^+.\\*()|\\?\\{\\}\\[\\]") == re.compile(
1087+
r"^\$\^\+\.\*\(\)\|\?\{\}\[\]$"
1088+
)

tests/async/test_page_request_fallback.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -164,10 +164,9 @@ async def handler_with_header_mods(route: Route) -> None:
164164
await page.route("**/*", handler_with_header_mods)
165165

166166
await page.goto(server.EMPTY_PAGE)
167-
async with page.expect_request("/sleep.zzz") as request_info:
167+
with server.expect_request("/sleep.zzz") as server_request_info:
168168
await page.evaluate("() => fetch('/sleep.zzz')")
169-
request = await request_info.value
170-
values.append(request.headers.get("foo"))
169+
values.append(server_request_info.value.getHeader("foo"))
171170
assert values == ["bar", "bar", "bar"]
172171

173172

tests/sync/test_browsercontext_request_fallback.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,10 +174,9 @@ def handler_with_header_mods(route: Route) -> None:
174174
context.route("**/*", handler_with_header_mods)
175175

176176
page.goto(server.EMPTY_PAGE)
177-
with page.expect_request("/sleep.zzz") as request_info:
177+
with server.expect_request("/sleep.zzz") as server_request_info:
178178
page.evaluate("() => fetch('/sleep.zzz')")
179-
request = request_info.value
180-
values.append(request.headers.get("foo"))
179+
values.append(server_request_info.value.getHeader("foo"))
181180
assert values == ["bar", "bar", "bar"]
182181

183182

tests/sync/test_page_request_fallback.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,10 +162,9 @@ def handler_with_header_mods(route: Route) -> None:
162162
page.route("**/*", handler_with_header_mods)
163163

164164
page.goto(server.EMPTY_PAGE)
165-
with page.expect_request("/sleep.zzz") as request_info:
165+
with server.expect_request("/sleep.zzz") as server_request_info:
166166
page.evaluate("() => fetch('/sleep.zzz')")
167-
request = request_info.value
168-
_append_with_return_value(values, request.headers.get("foo"))
167+
_append_with_return_value(values, server_request_info.value.getHeader("foo"))
169168
assert values == ["bar", "bar", "bar"]
170169

171170

0 commit comments

Comments
 (0)