Skip to content

Commit e1641fc

Browse files
authored
Parser: Support explicit matchers, touchups, critical bug fixes (#386)
- Git scp-style URLs: Fix detection detect of scp style URLs with `.git` prefixes - Fix critical bug where `matchers` were being applied as variable not annotations - Fix `pattern_defaults` - Base tests: Rename `Location` to `URL` - Tests and modules: Rename `_location` to `_url`
2 parents 0f2d30b + efe929f commit e1641fc

File tree

8 files changed

+335
-83
lines changed

8 files changed

+335
-83
lines changed

CHANGES

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ $ pip install --user --upgrade --pre libvcs
1616
### What's new
1717

1818
- New and improved logo
19-
- **Parser**: Experimental VCS URL parsing added ({issue}`376`, {issue}`381`, {issue}`384`):
19+
- **Parser**: Experimental VCS URL parsing added ({issue}`376`, {issue}`381`, {issue}`384`,
20+
{issue}`386`):
2021

2122
VCS Parsers return {func}`dataclasses.dataclass` instances. The new tools support validation,
2223
parsing, mutating and exporting into URLs consumable by the VCS.

libvcs/parse/base.py

Lines changed: 69 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import dataclasses
2-
from typing import Iterator, Pattern, Protocol
2+
from typing import Iterator, Optional, Pattern, Protocol
33

44
from libvcs._internal.dataclasses import SkipDefaultFieldsReprMixin
55

@@ -13,7 +13,7 @@ def __init__(self, url: str):
1313
def to_url(self) -> str:
1414
...
1515

16-
def is_valid(self, url: str) -> bool:
16+
def is_valid(self, url: str, is_explicit: Optional[bool] = None) -> bool:
1717
...
1818

1919

@@ -28,6 +28,8 @@ class Matcher(SkipDefaultFieldsReprMixin):
2828
pattern: Pattern
2929
"""Regex pattern"""
3030
pattern_defaults: dict = dataclasses.field(default_factory=dict)
31+
"""Is the match unambiguous with other VCS systems? e.g. git+ prefix"""
32+
is_explicit: bool = False
3133

3234

3335
@dataclasses.dataclass(repr=False)
@@ -37,10 +39,11 @@ class MatcherRegistry(SkipDefaultFieldsReprMixin):
3739
_matchers: dict[str, Matcher] = dataclasses.field(default_factory=dict)
3840

3941
def register(self, cls: Matcher) -> None:
40-
"""
42+
r"""
4143
4244
.. currentmodule:: libvcs.parse.git
4345
46+
>>> from dataclasses import dataclass
4447
>>> from libvcs.parse.git import GitURL, GitBaseURL
4548
4649
:class:`GitBaseURL` - the ``git(1)`` compliant parser - won't accept a pip-style URL:
@@ -56,30 +59,69 @@ def register(self, cls: Matcher) -> None:
5659
But what if you wanted to do ``github:org/repo``?
5760
5861
>>> GitURL.is_valid(url="github:org/repo")
59-
False
62+
True
63+
64+
That actually works, but look, it's caught in git's standard SCP regex:
65+
66+
>>> GitURL(url="github:org/repo")
67+
GitURL(url=github:org/repo,
68+
hostname=github,
69+
path=org/repo,
70+
matcher=core-git-scp)
71+
72+
We need something more specific. What do we do?
6073
6174
**Extending matching capability:**
6275
6376
>>> class GitHubPrefix(Matcher):
6477
... label = 'gh-prefix'
6578
... description ='Matches prefixes like github:org/repo'
66-
... pattern = r'^github:(?P<path>)'
79+
... pattern = r'^github:(?P<path>.*)$'
6780
... pattern_defaults = {
6881
... 'hostname': 'github.com',
6982
... 'scheme': 'https'
7083
... }
84+
... # We know it's git, not any other VCS
85+
... is_explicit = True
7186
72-
>>> class GitHubLocation(GitURL):
73-
... matchers = MatcherRegistry = MatcherRegistry(
87+
>>> @dataclasses.dataclass(repr=False)
88+
... class GitHubURL(GitURL):
89+
... matchers: MatcherRegistry = MatcherRegistry(
7490
... _matchers={'github_prefix': GitHubPrefix}
7591
... )
7692
77-
>>> GitHubLocation.is_valid(url='github:vcs-python/libvcs')
93+
>>> GitHubURL.is_valid(url='github:vcs-python/libvcs')
94+
True
95+
96+
>>> GitHubURL.is_valid(url='github:vcs-python/libvcs', is_explicit=True)
7897
True
7998
80-
>>> GitHubLocation.is_valid(url='gitlab:vcs-python/libvcs')
99+
Notice how ``pattern_defaults`` neatly fills the values for us.
100+
101+
>>> GitHubURL(url='github:vcs-python/libvcs')
102+
GitHubURL(url=github:vcs-python/libvcs,
103+
scheme=https,
104+
hostname=github.com,
105+
path=vcs-python/libvcs,
106+
matcher=gh-prefix)
107+
108+
>>> GitHubURL(url='github:vcs-python/libvcs').to_url()
109+
'https://github.com/vcs-python/libvcs'
110+
111+
>>> GitHubURL.is_valid(url='gitlab:vcs-python/libvcs')
81112
False
82113
114+
`GitHubURL` sees this as invalid since it only has one matcher,
115+
`GitHubPrefix`.
116+
117+
>>> GitURL.is_valid(url='gitlab:vcs-python/libvcs')
118+
True
119+
120+
Same story, getting caught in ``git(1)``'s own liberal scp-style URL:
121+
122+
>>> GitURL(url='gitlab:vcs-python/libvcs').matcher
123+
'core-git-scp'
124+
83125
>>> class GitLabPrefix(Matcher):
84126
... label = 'gl-prefix'
85127
... description ='Matches prefixes like gitlab:org/repo'
@@ -92,25 +134,33 @@ def register(self, cls: Matcher) -> None:
92134
93135
Option 1: Create a brand new matcher
94136
95-
>>> class GitLabLocation(GitURL):
96-
... matchers = MatcherRegistry = MatcherRegistry(
97-
... _matchers={'gitlab_prefix': GitLabPrefix}
98-
... )
137+
>>> @dataclasses.dataclass(repr=False)
138+
... class GitLabURL(GitURL):
139+
... matchers: MatcherRegistry = MatcherRegistry(
140+
... _matchers={'gitlab_prefix': GitLabPrefix}
141+
... )
99142
100-
>>> GitLabLocation.is_valid(url='gitlab:vcs-python/libvcs')
143+
>>> GitLabURL.is_valid(url='gitlab:vcs-python/libvcs')
101144
True
102145
103146
Option 2 (global, everywhere): Add to the global :class:`GitURL`:
104147
105148
>>> GitURL.is_valid(url='gitlab:vcs-python/libvcs')
106-
False
149+
True
150+
151+
Are we home free, though? Remember our issue with vague matches.
152+
153+
>>> GitURL(url='gitlab:vcs-python/libvcs').matcher
154+
'core-git-scp'
155+
156+
Register:
107157
108158
>>> GitURL.matchers.register(GitLabPrefix)
109159
110160
>>> GitURL.is_valid(url='gitlab:vcs-python/libvcs')
111161
True
112162
113-
git URLs + pip-style git URLs:
163+
**Example: git URLs + pip-style git URLs:**
114164
115165
This is already in :class:`GitURL` via :data:`PIP_DEFAULT_MATCHERS`. For the
116166
sake of showing how extensibility works, here is a recreation based on
@@ -120,8 +170,9 @@ def register(self, cls: Matcher) -> None:
120170
121171
>>> from libvcs.parse.git import DEFAULT_MATCHERS, PIP_DEFAULT_MATCHERS
122172
123-
>>> class GitURLWithPip(GitBaseURL):
124-
... matchers = MatcherRegistry = MatcherRegistry(
173+
>>> @dataclasses.dataclass(repr=False)
174+
... class GitURLWithPip(GitBaseURL):
175+
... matchers: MatcherRegistry = MatcherRegistry(
125176
... _matchers={m.label: m for m in [*DEFAULT_MATCHERS, *PIP_DEFAULT_MATCHERS]}
126177
... )
127178

0 commit comments

Comments
 (0)