Skip to content

Commit 6bc1a10

Browse files
authored
Parser: Improvements, tests (#384)
Parser: Fix port detection in svn and hg Add port detection to git Use rev to parse branch, tag, and references with git GitBaseURL and GitPipURL
2 parents 0779804 + 9f87311 commit 6bc1a10

File tree

10 files changed

+304
-147
lines changed

10 files changed

+304
-147
lines changed

CHANGES

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ $ pip install --user --upgrade --pre libvcs
1515

1616
### What's new
1717

18-
- **Parser**: Experimental VCS URL parsing added ({issue}`376`, {issue}`381`):
18+
- **Parser**: Experimental VCS URL parsing added ({issue}`376`, {issue}`381`, {i}): {issue}`384`):
1919

2020
VCS Parsers return {func}`dataclasses.dataclass` instances. The new tools support validation,
2121
parsing, mutating and exporting into URLs consumable by the VCS.
@@ -28,9 +28,16 @@ $ pip install --user --upgrade --pre libvcs
2828

2929
- {mod}`libvcs.parse.git`
3030

31-
- {class}`~libvcs.parse.git.GitURL` - Parse git URLs
32-
- {meth}`~libvcs.parse.git.GitURL.is_valid`
33-
- {meth}`~libvcs.parse.git.GitURL.to_url` - export `git clone`-compatible URL
31+
- {class}`~libvcs.parse.git.GitBaseURL` - Parse git URLs, `git(1)` compatible
32+
33+
- {meth}`~libvcs.parse.git.GitBaseURL.is_valid`
34+
- {meth}`~libvcs.parse.git.GitBaseURL.to_url` - export `git clone`-compatible URL
35+
36+
- {class}`~libvcs.parse.git.GitPipURL` - Pip URLs, {meth}`~libvcs.parse.git.GitPipURL.is_valid`,
37+
{meth}`~libvcs.parse.git.GitPipURL.to_url`
38+
39+
- {class}`~libvcs.parse.git.GitURL` - Compatibility focused,
40+
{meth}`~libvcs.parse.git.GitURL.is_valid` {meth}`~libvcs.parse.git.GitURL.to_url`
3441

3542
- {mod}`libvcs.parse.hg`
3643

docs/parse/git.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ For git, aka `git(1)`.
55
```{eval-rst}
66
.. autoapimodule:: libvcs.parse.git
77
:members:
8+
:inherited-members:
89
:exclude-members: StrOrBytesPath, StrPath, SkipDefaultFieldsReprMixin,
910
Matcher, MatcherRegistry, URLProtocol
1011
```

libvcs/parse/base.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,28 @@ class MatcherRegistry(SkipDefaultFieldsReprMixin):
3838

3939
def register(self, cls: Matcher) -> None:
4040
"""
41-
>>> from libvcs.parse.git import GitURL
41+
42+
.. currentmodule:: libvcs.parse.git
43+
44+
>>> from libvcs.parse.git import GitURL, GitBaseURL
45+
46+
:class:`GitBaseURL` - the ``git(1)`` compliant parser - won't accept a pip-style URL:
47+
48+
>>> GitBaseURL.is_valid(url="git+ssh://git@github.com/tony/AlgoXY.git")
49+
False
50+
51+
:class:`GitURL` - the "batteries-included" parser - can do it:
4252
4353
>>> GitURL.is_valid(url="git+ssh://git@github.com/tony/AlgoXY.git")
54+
True
55+
56+
But what if you wanted to do ``github:org/repo``?
57+
58+
>>> GitURL.is_valid(url="github:org/repo")
4459
False
4560
61+
**Extending matching capability:**
62+
4663
>>> class GitHubPrefix(Matcher):
4764
... label = 'gh-prefix'
4865
... description ='Matches prefixes like github:org/repo'
@@ -95,9 +112,15 @@ def register(self, cls: Matcher) -> None:
95112
96113
git URLs + pip-style git URLs:
97114
115+
This is already in :class:`GitURL` via :data:`PIP_DEFAULT_MATCHERS`. For the
116+
sake of showing how extensibility works, here is a recreation based on
117+
:class:`GitBaseURL`:
118+
119+
>>> from libvcs.parse.git import GitBaseURL
120+
98121
>>> from libvcs.parse.git import DEFAULT_MATCHERS, PIP_DEFAULT_MATCHERS
99122
100-
>>> class GitURLWithPip(GitURL):
123+
>>> class GitURLWithPip(GitBaseURL):
101124
... matchers = MatcherRegistry = MatcherRegistry(
102125
... _matchers={m.label: m for m in [*DEFAULT_MATCHERS, *PIP_DEFAULT_MATCHERS]}
103126
... )
@@ -108,7 +131,8 @@ def register(self, cls: Matcher) -> None:
108131
>>> GitURLWithPip(url="git+ssh://git@github.com/tony/AlgoXY.git")
109132
GitURLWithPip(url=git+ssh://git@github.com/tony/AlgoXY.git,
110133
scheme=git+ssh,
111-
hostname=git@github.com,
134+
user=git,
135+
hostname=github.com,
112136
path=tony/AlgoXY,
113137
suffix=.git,
114138
matcher=pip-url)

libvcs/parse/git.py

Lines changed: 115 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
11
"""This module is an all-in-one parser and validator for Git URLs.
22
33
- Detection: :meth:`GitURL.is_valid()`
4-
- Parse: :class:`GitURL`
4+
- Parse:
55
66
compare to :class:`urllib.parse.ParseResult`
77
8-
- Output ``git(1)`` URL: :meth:`GitURL.to_url()`
8+
- Compatibility focused: :class:`GitURL`: Will work with ``git(1)`` as well as
9+
``pip(1)`` style URLs
10+
11+
- Output ``git(1)`` URL: :meth:`GitURL.to_url()`
12+
- Strict ``git(1)`` compatibility: :class:`GitBaseURL`.
13+
14+
- Output ``git(1)`` URL: :meth:`GitBaseURL.to_url()`
915
- Extendable via :class:`~libvcs.parse.base.MatcherRegistry`,
1016
:class:`~libvcs.parse.base.Matcher`
1117
"""
@@ -23,7 +29,7 @@
2329
# We modified it to have groupings
2430
SCP_REGEX = r"""
2531
# Optional user, e.g. 'git@'
26-
(?P<user>(\w+))?@
32+
((?P<user>\w+)@)?
2733
# Server, e.g. 'github.com'.
2834
(?P<hostname>([^/:]+)):
2935
# The server-side path. e.g. 'user/project.git'. Must start with an
@@ -33,10 +39,12 @@
3339
"""
3440

3541
RE_PATH = r"""
42+
((?P<user>\w+)@)?
3643
(?P<hostname>([^/:]+))
44+
(:(?P<port>\d{1,5}))?
3745
(?P<separator>[:,/])?
3846
(?P<path>
39-
(\w[^:.]*) # cut the path at . to negate .git
47+
(\w[^:.@]*) # cut the path at . to negate .git, @ from pip
4048
)?
4149
"""
4250

@@ -100,27 +108,31 @@
100108
)
101109
"""
102110

103-
RE_PIP_SCHEME_WITH_HTTP = r"""
111+
RE_PIP_SCP_SCHEME = r"""
104112
(?P<scheme>
105113
(
106114
git\+ssh|
107-
git\+https|
108-
git\+http|
109115
git\+file
110116
)
111117
)
112118
"""
113119

120+
RE_PIP_REV = r"""
121+
(@(?P<rev>.*))
122+
"""
123+
124+
114125
PIP_DEFAULT_MATCHERS: list[Matcher] = [
115126
Matcher(
116127
label="pip-url",
117128
description="pip-style git URL",
118129
pattern=re.compile(
119130
rf"""
120-
{RE_PIP_SCHEME_WITH_HTTP}
131+
{RE_PIP_SCHEME}
121132
://
122133
{RE_PATH}
123134
{RE_SUFFIX}?
135+
{RE_PIP_REV}?
124136
""",
125137
re.VERBOSE,
126138
),
@@ -130,9 +142,10 @@
130142
description="pip-style git ssh/scp URL",
131143
pattern=re.compile(
132144
rf"""
133-
{RE_PIP_SCHEME}
145+
{RE_PIP_SCP_SCHEME}
134146
{SCP_REGEX}?
135-
{RE_SUFFIX}
147+
{RE_SUFFIX}?
148+
{RE_PIP_REV}?
136149
""",
137150
re.VERBOSE,
138151
),
@@ -142,9 +155,10 @@
142155
label="pip-file-url",
143156
description="pip-style git+file:// URL",
144157
pattern=re.compile(
145-
r"""
158+
rf"""
146159
(?P<scheme>git\+file)://
147-
(?P<path>.*)
160+
(?P<path>[^@]*)
161+
{RE_PIP_REV}?
148162
""",
149163
re.VERBOSE,
150164
),
@@ -193,7 +207,7 @@
193207

194208

195209
@dataclasses.dataclass(repr=False)
196-
class GitURL(URLProtocol, SkipDefaultFieldsReprMixin):
210+
class GitBaseURL(URLProtocol, SkipDefaultFieldsReprMixin):
197211
"""Git gepository location. Parses URLs on initialization.
198212
199213
Examples
@@ -216,9 +230,9 @@ class GitURL(URLProtocol, SkipDefaultFieldsReprMixin):
216230
217231
>>> GitURL(url='git@github.com:vcs-python/libvcs.git')
218232
GitURL(url=git@github.com:vcs-python/libvcs.git,
233+
user=git,
219234
hostname=github.com,
220235
path=vcs-python/libvcs,
221-
user=git,
222236
suffix=.git,
223237
matcher=core-git-scp)
224238
@@ -229,28 +243,18 @@ class GitURL(URLProtocol, SkipDefaultFieldsReprMixin):
229243
----------
230244
matcher : str
231245
name of the :class:`~libvcs.parse.base.Matcher`
232-
233-
branch : str, optional
234-
Default URL parsers don't output these,
235-
can be added by extending or passing manually
236246
"""
237247

238248
url: str
239249
scheme: Optional[str] = None
250+
user: Optional[str] = None
240251
hostname: Optional[str] = None
252+
port: Optional[int] = None
241253
path: Optional[str] = None
242-
user: Optional[str] = None
243254

244255
# Decoration
245256
suffix: Optional[str] = None
246257

247-
#
248-
# commit-ish: tag, branch, ref, revision
249-
#
250-
ref: Optional[str] = None
251-
branch: Optional[str] = None
252-
tag: Optional[str] = None
253-
254258
matcher: Optional[str] = None
255259
matchers = MatcherRegistry = MatcherRegistry(
256260
_matchers={m.label: m for m in DEFAULT_MATCHERS}
@@ -298,9 +302,9 @@ def to_url(self) -> str:
298302
299303
>>> git_location
300304
GitURL(url=git@github.com:vcs-python/libvcs.git,
305+
user=git,
301306
hostname=github.com,
302307
path=vcs-python/libvcs,
303-
user=git,
304308
suffix=.git,
305309
matcher=core-git-scp)
306310
@@ -333,3 +337,87 @@ def to_url(self) -> str:
333337
parts.append(self.suffix)
334338

335339
return "".join(part for part in parts if isinstance(part, str))
340+
341+
342+
@dataclasses.dataclass(repr=False)
343+
class GitPipURL(GitBaseURL, URLProtocol, SkipDefaultFieldsReprMixin):
344+
"""Supports pip git URLs."""
345+
346+
# commit-ish (rev): tag, branch, ref
347+
rev: Optional[str] = None
348+
349+
matchers = MatcherRegistry = MatcherRegistry(
350+
_matchers={m.label: m for m in PIP_DEFAULT_MATCHERS}
351+
)
352+
353+
def to_url(self) -> str:
354+
"""Exports a pip-compliant URL.
355+
356+
Examples
357+
--------
358+
359+
>>> git_location = GitPipURL(
360+
... url='git+ssh://git@bitbucket.example.com:7999/PROJ/repo.git'
361+
... )
362+
363+
>>> git_location
364+
GitPipURL(url=git+ssh://git@bitbucket.example.com:7999/PROJ/repo.git,
365+
scheme=git+ssh,
366+
user=git,
367+
hostname=bitbucket.example.com,
368+
port=7999,
369+
path=PROJ/repo,
370+
suffix=.git,
371+
matcher=pip-url)
372+
373+
>>> git_location.path = 'libvcs/vcspull'
374+
375+
>>> git_location.to_url()
376+
'git+ssh://bitbucket.example.com/libvcs/vcspull.git'
377+
378+
It also accepts revisions, e.g. branch, tag, ref:
379+
380+
>>> git_location = GitPipURL(
381+
... url='git+https://github.com/vcs-python/libvcs.git@v0.10.0'
382+
... )
383+
384+
>>> git_location
385+
GitPipURL(url=git+https://github.com/vcs-python/libvcs.git@v0.10.0,
386+
scheme=git+https,
387+
hostname=github.com,
388+
path=vcs-python/libvcs,
389+
suffix=.git,
390+
matcher=pip-url,
391+
rev=v0.10.0)
392+
393+
>>> git_location.path = 'libvcs/vcspull'
394+
395+
>>> git_location.to_url()
396+
'git+https://github.com/libvcs/vcspull.git@v0.10.0'
397+
"""
398+
url = super().to_url()
399+
400+
if self.rev:
401+
url = f"{url}@{self.rev}"
402+
403+
return url
404+
405+
406+
@dataclasses.dataclass(repr=False)
407+
class GitURL(GitPipURL, GitBaseURL, URLProtocol, SkipDefaultFieldsReprMixin):
408+
"""Batteries included URL Parser. Supports git(1) and pip URLs.
409+
410+
**Ancestors (MRO)**
411+
This URL parser inherits methods and attributes from the following parsers:
412+
413+
- :class:`GitPipURL`
414+
415+
- :meth:`GitPipURL.to_url`
416+
- :class:`GitBaseURL`
417+
418+
- :meth:`GitBaseURL.to_url`
419+
"""
420+
421+
matchers = MatcherRegistry = MatcherRegistry(
422+
_matchers={m.label: m for m in [*DEFAULT_MATCHERS, *PIP_DEFAULT_MATCHERS]}
423+
)

libvcs/parse/hg.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@
2626
from .base import Matcher, MatcherRegistry, URLProtocol
2727

2828
RE_PATH = r"""
29-
((?P<user>.*)@)?
30-
(?P<hostname>([^/:]+))
31-
(:(?P<port>\d{1,4}))?
29+
((?P<user>\w+)@)?
30+
(?P<hostname>([^/:@]+))
31+
(:(?P<port>\d{1,5}))?
3232
(?P<separator>/)?
3333
(?P<path>
3434
/?(\w[^:.]*)

libvcs/parse/svn.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
RE_PATH = r"""
3030
((?P<user>.*)@)?
3131
(?P<hostname>([^/:]+))
32-
(:(?P<port>\d{1,4}))?
32+
(:(?P<port>\d{1,5}))?
3333
(?P<separator>/)?
3434
(?P<path>
3535
(\w[^:.]*)

tests/data/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)