1
1
import dataclasses
2
- from typing import Iterator , Pattern , Protocol
2
+ from typing import Iterator , Optional , Pattern , Protocol
3
3
4
4
from libvcs ._internal .dataclasses import SkipDefaultFieldsReprMixin
5
5
@@ -13,7 +13,7 @@ def __init__(self, url: str):
13
13
def to_url (self ) -> str :
14
14
...
15
15
16
- def is_valid (self , url : str ) -> bool :
16
+ def is_valid (self , url : str , is_explicit : Optional [ bool ] = None ) -> bool :
17
17
...
18
18
19
19
@@ -28,6 +28,8 @@ class Matcher(SkipDefaultFieldsReprMixin):
28
28
pattern : Pattern
29
29
"""Regex pattern"""
30
30
pattern_defaults : dict = dataclasses .field (default_factory = dict )
31
+ """Is the match unambiguous with other VCS systems? e.g. git+ prefix"""
32
+ is_explicit : bool = False
31
33
32
34
33
35
@dataclasses .dataclass (repr = False )
@@ -37,10 +39,11 @@ class MatcherRegistry(SkipDefaultFieldsReprMixin):
37
39
_matchers : dict [str , Matcher ] = dataclasses .field (default_factory = dict )
38
40
39
41
def register (self , cls : Matcher ) -> None :
40
- """
42
+ r """
41
43
42
44
.. currentmodule:: libvcs.parse.git
43
45
46
+ >>> from dataclasses import dataclass
44
47
>>> from libvcs.parse.git import GitURL, GitBaseURL
45
48
46
49
:class:`GitBaseURL` - the ``git(1)`` compliant parser - won't accept a pip-style URL:
@@ -56,30 +59,69 @@ def register(self, cls: Matcher) -> None:
56
59
But what if you wanted to do ``github:org/repo``?
57
60
58
61
>>> GitURL.is_valid(url="github:org/repo")
59
- False
62
+ True
63
+
64
+ That actually works, but look, it's caught in git's standard SCP regex:
65
+
66
+ >>> GitURL(url="github:org/repo")
67
+ GitURL(url=github:org/repo,
68
+ hostname=github,
69
+ path=org/repo,
70
+ matcher=core-git-scp)
71
+
72
+ We need something more specific. What do we do?
60
73
61
74
**Extending matching capability:**
62
75
63
76
>>> class GitHubPrefix(Matcher):
64
77
... label = 'gh-prefix'
65
78
... description ='Matches prefixes like github:org/repo'
66
- ... pattern = r'^github:(?P<path>) '
79
+ ... pattern = r'^github:(?P<path>.*)$ '
67
80
... pattern_defaults = {
68
81
... 'hostname': 'github.com',
69
82
... 'scheme': 'https'
70
83
... }
84
+ ... # We know it's git, not any other VCS
85
+ ... is_explicit = True
71
86
72
- >>> class GitHubLocation(GitURL):
73
- ... matchers = MatcherRegistry = MatcherRegistry(
87
+ >>> @dataclasses.dataclass(repr=False)
88
+ ... class GitHubURL(GitURL):
89
+ ... matchers: MatcherRegistry = MatcherRegistry(
74
90
... _matchers={'github_prefix': GitHubPrefix}
75
91
... )
76
92
77
- >>> GitHubLocation.is_valid(url='github:vcs-python/libvcs')
93
+ >>> GitHubURL.is_valid(url='github:vcs-python/libvcs')
94
+ True
95
+
96
+ >>> GitHubURL.is_valid(url='github:vcs-python/libvcs', is_explicit=True)
78
97
True
79
98
80
- >>> GitHubLocation.is_valid(url='gitlab:vcs-python/libvcs')
99
+ Notice how ``pattern_defaults`` neatly fills the values for us.
100
+
101
+ >>> GitHubURL(url='github:vcs-python/libvcs')
102
+ GitHubURL(url=github:vcs-python/libvcs,
103
+ scheme=https,
104
+ hostname=github.com,
105
+ path=vcs-python/libvcs,
106
+ matcher=gh-prefix)
107
+
108
+ >>> GitHubURL(url='github:vcs-python/libvcs').to_url()
109
+ 'https://github.com/vcs-python/libvcs'
110
+
111
+ >>> GitHubURL.is_valid(url='gitlab:vcs-python/libvcs')
81
112
False
82
113
114
+ `GitHubURL` sees this as invalid since it only has one matcher,
115
+ `GitHubPrefix`.
116
+
117
+ >>> GitURL.is_valid(url='gitlab:vcs-python/libvcs')
118
+ True
119
+
120
+ Same story, getting caught in ``git(1)``'s own liberal scp-style URL:
121
+
122
+ >>> GitURL(url='gitlab:vcs-python/libvcs').matcher
123
+ 'core-git-scp'
124
+
83
125
>>> class GitLabPrefix(Matcher):
84
126
... label = 'gl-prefix'
85
127
... description ='Matches prefixes like gitlab:org/repo'
@@ -92,25 +134,33 @@ def register(self, cls: Matcher) -> None:
92
134
93
135
Option 1: Create a brand new matcher
94
136
95
- >>> class GitLabLocation(GitURL):
96
- ... matchers = MatcherRegistry = MatcherRegistry(
97
- ... _matchers={'gitlab_prefix': GitLabPrefix}
98
- ... )
137
+ >>> @dataclasses.dataclass(repr=False)
138
+ ... class GitLabURL(GitURL):
139
+ ... matchers: MatcherRegistry = MatcherRegistry(
140
+ ... _matchers={'gitlab_prefix': GitLabPrefix}
141
+ ... )
99
142
100
- >>> GitLabLocation .is_valid(url='gitlab:vcs-python/libvcs')
143
+ >>> GitLabURL .is_valid(url='gitlab:vcs-python/libvcs')
101
144
True
102
145
103
146
Option 2 (global, everywhere): Add to the global :class:`GitURL`:
104
147
105
148
>>> GitURL.is_valid(url='gitlab:vcs-python/libvcs')
106
- False
149
+ True
150
+
151
+ Are we home free, though? Remember our issue with vague matches.
152
+
153
+ >>> GitURL(url='gitlab:vcs-python/libvcs').matcher
154
+ 'core-git-scp'
155
+
156
+ Register:
107
157
108
158
>>> GitURL.matchers.register(GitLabPrefix)
109
159
110
160
>>> GitURL.is_valid(url='gitlab:vcs-python/libvcs')
111
161
True
112
162
113
- git URLs + pip-style git URLs:
163
+ **Example: git URLs + pip-style git URLs:**
114
164
115
165
This is already in :class:`GitURL` via :data:`PIP_DEFAULT_MATCHERS`. For the
116
166
sake of showing how extensibility works, here is a recreation based on
@@ -120,8 +170,9 @@ def register(self, cls: Matcher) -> None:
120
170
121
171
>>> from libvcs.parse.git import DEFAULT_MATCHERS, PIP_DEFAULT_MATCHERS
122
172
123
- >>> class GitURLWithPip(GitBaseURL):
124
- ... matchers = MatcherRegistry = MatcherRegistry(
173
+ >>> @dataclasses.dataclass(repr=False)
174
+ ... class GitURLWithPip(GitBaseURL):
175
+ ... matchers: MatcherRegistry = MatcherRegistry(
125
176
... _matchers={m.label: m for m in [*DEFAULT_MATCHERS, *PIP_DEFAULT_MATCHERS]}
126
177
... )
127
178
0 commit comments