Skip to content

Commit 95f47cb

Browse files
authored
Add URL scheme validation with explicit error messages #1047 (#1085)
Signed-off-by: tdruez <tdruez@nexb.com>
1 parent 6ce3e30 commit 95f47cb

File tree

7 files changed

+95
-23
lines changed

7 files changed

+95
-23
lines changed

CHANGELOG.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ v33.2.0 (unreleased)
4141
- Extract all archives recursively in the `scan_single_package` pipeline.
4242
https://github.com/nexB/scancode.io/issues/1081
4343

44+
- Add URL scheme validation with explicit error messages for input URLs.
45+
https://github.com/nexB/scancode.io/issues/1047
46+
4447
v33.1.0 (2024-02-02)
4548
--------------------
4649

docs/built-in-pipelines.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ Find Vulnerabilities (addon)
5858

5959
.. _pipeline_inspect_elf:
6060

61-
Inspect ELF Binaries
62-
--------------------
61+
Inspect ELF Binaries (addon)
62+
----------------------------
6363
.. autoclass:: scanpipe.pipelines.inspect_elf_binaries.InspectELFBinaries()
6464
:members:
6565
:member-order: bysource

scanpipe/forms.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from taggit.forms import TagWidget
3030

3131
from scanpipe.models import Project
32-
from scanpipe.pipes.fetch import check_urls_availability
32+
from scanpipe.pipes import fetch
3333

3434
scanpipe_app = apps.get_app_config("scanpipe")
3535

@@ -76,6 +76,23 @@ class InputsBaseForm(forms.Form):
7676
class Media:
7777
js = ("add-inputs.js",)
7878

79+
@staticmethod
80+
def validate_scheme(input_urls):
81+
"""
82+
Raise a validation error if some of the `input_urls` have a scheme that is not
83+
supported.
84+
"""
85+
errors = []
86+
87+
for url in input_urls:
88+
try:
89+
fetch.get_fetcher(url)
90+
except ValueError as e:
91+
errors.append(str(e))
92+
93+
if errors:
94+
raise ValidationError("\n".join(errors))
95+
7996
def clean_input_urls(self):
8097
"""
8198
Fetch the `input_urls` and sets the `downloads` objects in the cleaned_data.
@@ -84,8 +101,9 @@ def clean_input_urls(self):
84101
input_urls_str = self.cleaned_data.get("input_urls", "")
85102
input_urls = input_urls_str.split()
86103

87-
errors = check_urls_availability(input_urls)
88-
if errors:
104+
self.validate_scheme(input_urls)
105+
106+
if errors := fetch.check_urls_availability(input_urls):
89107
raise ValidationError("Could not fetch:\n" + "\n".join(errors))
90108

91109
return input_urls

scanpipe/pipes/fetch.py

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -211,27 +211,28 @@ def get_docker_image_platform(docker_reference):
211211
)
212212

213213

214-
def fetch_docker_image(docker_reference, to=None):
214+
def fetch_docker_image(download_url, to=None):
215215
"""
216-
Fetch a docker image from the provided Docker image `docker_reference`
217-
docker:// reference URL. Return a `download` object.
216+
Fetch a docker image from the provided Docker image `download_url`,
217+
using the "docker://reference" URL syntax.
218+
Return a `Download` object.
218219
219220
Docker references are documented here:
220221
https://github.com/containers/skopeo/blob/0faf16017/docs/skopeo.1.md#image-names
221222
"""
222223
whitelist = r"^docker://[a-zA-Z0-9_.:/@-]+$"
223-
if not re.match(whitelist, docker_reference):
224+
if not re.match(whitelist, download_url):
224225
raise ValueError("Invalid Docker reference.")
225226

226-
name = python_safe_name(docker_reference.replace("docker://", ""))
227-
filename = f"{name}.tar"
227+
reference = download_url.replace("docker://", "")
228+
filename = f"{python_safe_name(reference)}.tar"
228229
download_directory = to or tempfile.mkdtemp()
229230
output_file = Path(download_directory, filename)
230231
target = f"docker-archive:{output_file}"
231232
skopeo_executable = _get_skopeo_location()
232233

233234
platform_args = []
234-
if platform := get_docker_image_platform(docker_reference):
235+
if platform := get_docker_image_platform(download_url):
235236
os, arch, variant = platform
236237
if os:
237238
platform_args.append(f"--override-os={os}")
@@ -245,7 +246,7 @@ def fetch_docker_image(docker_reference, to=None):
245246
"copy",
246247
"--insecure-policy",
247248
*platform_args,
248-
docker_reference,
249+
download_url,
249250
target,
250251
)
251252
logger.info(f"Fetching image with: {cmd_args}")
@@ -255,7 +256,7 @@ def fetch_docker_image(docker_reference, to=None):
255256
checksums = multi_checksums(output_file, ("md5", "sha1"))
256257

257258
return Download(
258-
uri=docker_reference,
259+
uri=download_url,
259260
directory=download_directory,
260261
filename=filename,
261262
path=output_file,
@@ -265,16 +266,30 @@ def fetch_docker_image(docker_reference, to=None):
265266
)
266267

267268

268-
def _get_fetcher(url):
269-
"""Return the fetcher function based on the provided `url`."""
270-
if url.startswith("docker://"):
271-
return fetch_docker_image
272-
return fetch_http
269+
SCHEME_TO_FETCHER_MAPPING = {
270+
"http": fetch_http,
271+
"https": fetch_http,
272+
"docker": fetch_docker_image,
273+
}
274+
275+
276+
def get_fetcher(url):
277+
"""Return the fetcher function based on the provided `url` scheme."""
278+
# Not using `urlparse(url).scheme` for the scheme as it converts to lower case.
279+
scheme = url.split("://")[0]
280+
281+
if fetcher := SCHEME_TO_FETCHER_MAPPING.get(scheme):
282+
return fetcher
283+
284+
error_msg = f"URL scheme '{scheme}' is not supported."
285+
if scheme.lower() in SCHEME_TO_FETCHER_MAPPING:
286+
error_msg += f" Did you mean: '{scheme.lower()}'?"
287+
raise ValueError(error_msg)
273288

274289

275290
def fetch_url(url):
276291
"""Fetch provided `url` and returns the result as a `Download` object."""
277-
fetcher = _get_fetcher(url)
292+
fetcher = get_fetcher(url)
278293
logger.info(f'Fetching "{url}" using {fetcher.__name__}')
279294
downloaded = fetcher(url)
280295
return downloaded

scanpipe/templates/scanpipe/project_form.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ <h2 class="subtitle mb-0 mb-4">
8585
{{ form.media }}
8686
</div>
8787

88-
<div class="column has-background-light has-border-radius">
88+
<div class="column has-background-light has-border-radius mb-3">
8989
<h3 class="subtitle mb-3">Pipelines:</h3>
9090
{% for pipeline_name, pipeline_info in pipelines.items %}
9191
<div {% if not forloop.last %}class="mb-3"{% endif %}>

scanpipe/tests/pipes/test_fetch.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,31 @@
3131
class ScanPipeFetchPipesTest(TestCase):
3232
data_location = Path(__file__).parent.parent / "data"
3333

34+
def test_scanpipe_pipes_fetch_get_fetcher(self):
35+
self.assertEqual(fetch.fetch_http, fetch.get_fetcher("http://a.b/f.z"))
36+
self.assertEqual(fetch.fetch_http, fetch.get_fetcher("https://a.b/f.z"))
37+
self.assertEqual(fetch.fetch_docker_image, fetch.get_fetcher("docker://image"))
38+
39+
with self.assertRaises(ValueError) as cm:
40+
fetch.get_fetcher("")
41+
expected = "URL scheme '' is not supported."
42+
self.assertEqual(expected, str(cm.exception))
43+
44+
with self.assertRaises(ValueError) as cm:
45+
fetch.get_fetcher("abcd://a.b/f.z")
46+
expected = "URL scheme 'abcd' is not supported."
47+
self.assertEqual(expected, str(cm.exception))
48+
49+
with self.assertRaises(ValueError) as cm:
50+
fetch.get_fetcher("Docker://image")
51+
expected = "URL scheme 'Docker' is not supported. Did you mean: 'docker'?"
52+
self.assertEqual(expected, str(cm.exception))
53+
54+
with self.assertRaises(ValueError) as cm:
55+
fetch.get_fetcher("DOCKER://image")
56+
expected = "URL scheme 'DOCKER' is not supported. Did you mean: 'docker'?"
57+
self.assertEqual(expected, str(cm.exception))
58+
3459
@mock.patch("requests.get")
3560
def test_scanpipe_pipes_fetch_http(self, mock_get):
3661
url = "https://example.com/filename.zip"
@@ -63,8 +88,12 @@ def test_scanpipe_pipes_fetch_http(self, mock_get):
6388
def test_scanpipe_pipes_fetch_docker_image(
6489
self, mock_run_command_safely, mock_skopeo, mock_platform
6590
):
66-
url = "docker://debian:10.9"
91+
with self.assertRaises(ValueError) as cm:
92+
fetch.fetch_docker_image("Docker://debian")
93+
expected = "Invalid Docker reference."
94+
self.assertEqual(expected, str(cm.exception))
6795

96+
url = "docker://debian:10.9"
6897
mock_platform.return_value = "linux", "amd64", ""
6998
mock_skopeo.return_value = "skopeo"
7099
mock_run_command_safely.side_effect = Exception

scanpipe/tests/test_forms.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,16 @@ def setUp(self):
4141
@mock.patch("requests.head")
4242
def test_scanpipe_forms_inputs_base_form_input_urls(self, mock_head):
4343
data = {
44-
"input_urls": "https://example.com/archive.zip",
44+
"input_urls": "Docker://debian",
4545
}
46+
form = InputsBaseForm(data=data)
47+
self.assertFalse(form.is_valid())
48+
error_msg = "URL scheme 'Docker' is not supported. Did you mean: 'docker'?"
49+
self.assertEqual({"input_urls": [error_msg]}, form.errors)
4650

51+
data = {
52+
"input_urls": "https://example.com/archive.zip",
53+
}
4754
mock_head.side_effect = requests.exceptions.RequestException
4855
form = InputsBaseForm(data=data)
4956
self.assertFalse(form.is_valid())

0 commit comments

Comments
 (0)