Skip to content

Commit 5d3601a

Browse files
authored
Add support for using Package URL (purl) as project input. (#1686)
* Upgrade packageurl-python to latest version #1383 Signed-off-by: tdruez <tdruez@nexb.com> * Add make_mock_response to simplify setup in unit test #1383 Signed-off-by: tdruez <tdruez@nexb.com> * Add support for fetching Package URLs (fetch_package_url) #1383 Signed-off-by: tdruez <tdruez@nexb.com> * Add Package URL placeholder in InputsBaseForm #1383 Signed-off-by: tdruez <tdruez@nexb.com> * Add CHANGELOG entry #1383 Signed-off-by: tdruez <tdruez@nexb.com> --------- Signed-off-by: tdruez <tdruez@nexb.com>
1 parent d9875ff commit 5d3601a

File tree

9 files changed

+70
-47
lines changed

9 files changed

+70
-47
lines changed

CHANGELOG.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ Changelog
44
v34.12.0 (unreleased)
55
---------------------
66

7+
- Add support for using Package URL (purl) as project input.
8+
This implementation is based on ``purl2url.get_download_url``.
9+
https://github.com/aboutcode-org/scancode.io/issues/1383
10+
711
- Raise a ``MatchCodeIOException`` when the response from the MatchCode.io service is
812
not valid in ``send_project_json_to_matchcode``.
913
This generally means an issue on the MatchCode.io server side.

scanpipe/forms.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,16 +64,17 @@ class InputsBaseForm(forms.Form):
6464
label="Download URLs",
6565
required=False,
6666
help_text=(
67-
"Provide one or more URLs to download, one per line. "
68-
"Files are fetched at the beginning of the pipeline run execution."
67+
"Enter one or more download URLs, one per line. "
68+
"Files will be fetched when the pipeline starts."
6969
),
7070
widget=forms.Textarea(
7171
attrs={
7272
"class": "textarea is-dynamic",
73-
"rows": 2,
73+
"rows": 3,
7474
"placeholder": (
7575
"https://domain.com/archive.zip\n"
76-
"docker://docker-reference (e.g.: docker://postgres:13)"
76+
"docker://docker-reference (e.g.: docker://postgres:13)\n"
77+
"pkg://type/name@version"
7778
),
7879
},
7980
),

scanpipe/pipes/fetch.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@
3939
from commoncode import command
4040
from commoncode.hash import multi_checksums
4141
from commoncode.text import python_safe_name
42+
from packageurl import PackageURL
43+
from packageurl.contrib import purl2url
4244
from plugincode.location_provider import get_location
4345
from requests import auth as request_auth
4446

@@ -356,6 +358,17 @@ def fetch_git_repo(url, to=None):
356358
)
357359

358360

361+
def fetch_package_url(url):
362+
# Ensure the provided Package URL is valid, or raise a ValueError.
363+
PackageURL.from_string(url)
364+
365+
# Resolve a Download URL using purl2url.
366+
if download_url := purl2url.get_download_url(url):
367+
return fetch_http(download_url)
368+
369+
raise ValueError(f"Could not resolve a download URL for {url}.")
370+
371+
359372
SCHEME_TO_FETCHER_MAPPING = {
360373
"http": fetch_http,
361374
"https": fetch_http,
@@ -371,6 +384,9 @@ def get_fetcher(url):
371384
if url.rstrip("/").endswith(".git"):
372385
return fetch_git_repo
373386

387+
if url.startswith("pkg:"):
388+
return fetch_package_url
389+
374390
# Not using `urlparse(url).scheme` for the scheme as it converts to lower case.
375391
scheme = url.split("://")[0]
376392

scanpipe/tests/__init__.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,16 @@ def make_message(project, **data):
101101
)
102102

103103

104+
def make_mock_response(url, content=b"\x00", status_code=200, headers=None):
105+
"""Return a mock HTTP response object for testing purposes."""
106+
response = mock.Mock()
107+
response.url = url
108+
response.content = content
109+
response.status_code = status_code
110+
response.headers = headers or {}
111+
return response
112+
113+
104114
resource_data1 = {
105115
"path": "notice.NOTICE",
106116
"type": "file",

scanpipe/tests/pipes/test_fetch.py

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from requests import auth as request_auth
3030

3131
from scanpipe.pipes import fetch
32+
from scanpipe.tests import make_mock_response
3233

3334

3435
class ScanPipeFetchPipesTest(TestCase):
@@ -41,6 +42,7 @@ def test_scanpipe_pipes_fetch_get_fetcher(self):
4142
git_http_url = "https://github.com/aboutcode-org/scancode.io.git"
4243
self.assertEqual(fetch.fetch_git_repo, fetch.get_fetcher(git_http_url))
4344
self.assertEqual(fetch.fetch_git_repo, fetch.get_fetcher(git_http_url + "/"))
45+
self.assertEqual(fetch.fetch_package_url, fetch.get_fetcher("pkg:npm/d3@5.8.0"))
4446

4547
with self.assertRaises(ValueError) as cm:
4648
fetch.get_fetcher("")
@@ -71,28 +73,41 @@ def test_scanpipe_pipes_fetch_get_fetcher(self):
7173
def test_scanpipe_pipes_fetch_http(self, mock_get):
7274
url = "https://example.com/filename.zip"
7375

74-
mock_get.return_value = mock.Mock(
75-
content=b"\x00", headers={}, status_code=200, url=url
76-
)
76+
mock_get.return_value = make_mock_response(url=url)
7777
downloaded_file = fetch.fetch_http(url)
7878
self.assertTrue(Path(downloaded_file.directory, "filename.zip").exists())
7979

8080
url_with_spaces = "https://example.com/space%20in%20name.zip"
81-
mock_get.return_value = mock.Mock(
82-
content=b"\x00", headers={}, status_code=200, url=url_with_spaces
83-
)
81+
mock_get.return_value = make_mock_response(url=url_with_spaces)
8482
downloaded_file = fetch.fetch_http(url)
8583
self.assertTrue(Path(downloaded_file.directory, "space in name.zip").exists())
8684

8785
headers = {
8886
"content-disposition": 'attachment; filename="another_name.zip"',
8987
}
90-
mock_get.return_value = mock.Mock(
91-
content=b"\x00", headers=headers, status_code=200, url=url
92-
)
88+
mock_get.return_value = make_mock_response(url=url, headers=headers)
9389
downloaded_file = fetch.fetch_http(url)
9490
self.assertTrue(Path(downloaded_file.directory, "another_name.zip").exists())
9591

92+
@mock.patch("requests.sessions.Session.get")
93+
def test_scanpipe_pipes_fetch_package_url(self, mock_get):
94+
package_url = "pkg:not_a_valid_purl"
95+
with self.assertRaises(ValueError) as cm:
96+
fetch.fetch_package_url(package_url)
97+
expected = f"purl is missing the required type component: '{package_url}'."
98+
self.assertEqual(expected, str(cm.exception))
99+
100+
package_url = "pkg:generic/name@version"
101+
with self.assertRaises(ValueError) as cm:
102+
fetch.fetch_package_url(package_url)
103+
expected = f"Could not resolve a download URL for {package_url}."
104+
self.assertEqual(expected, str(cm.exception))
105+
106+
package_url = "pkg:npm/d3@5.8.0"
107+
mock_get.return_value = make_mock_response(url="https://exa.com/filename.zip")
108+
downloaded_file = fetch.fetch_package_url(package_url)
109+
self.assertTrue(Path(downloaded_file.directory, "filename.zip").exists())
110+
96111
@mock.patch("scanpipe.pipes.fetch.get_docker_image_platform")
97112
@mock.patch("scanpipe.pipes.fetch._get_skopeo_location")
98113
@mock.patch("scanpipe.pipes.fetch.run_command_safely")
@@ -188,9 +203,7 @@ def test_scanpipe_pipes_fetch_fetch_urls(self, mock_get):
188203
"https://example.com/archive.tar.gz",
189204
]
190205

191-
mock_get.return_value = mock.Mock(
192-
content=b"\x00", headers={}, status_code=200, url="mocked_url"
193-
)
206+
mock_get.return_value = make_mock_response(url="mocked_url")
194207
downloads, errors = fetch.fetch_urls(urls)
195208
self.assertEqual(2, len(downloads))
196209
self.assertEqual(urls[0], downloads[0].uri)

scanpipe/tests/test_commands.py

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
from scanpipe.models import WebhookSubscription
4949
from scanpipe.pipes import flag
5050
from scanpipe.pipes import purldb
51+
from scanpipe.tests import make_mock_response
5152
from scanpipe.tests import make_package
5253
from scanpipe.tests import make_project
5354
from scanpipe.tests import make_resource_file
@@ -963,12 +964,7 @@ def test_scanpipe_management_command_purldb_scan_queue_worker(
963964
mock_get_latest_output.return_value = (
964965
self.data / "scancode" / "is-npm-1.0.0_summary.json"
965966
)
966-
mock_download_get.return_value = mock.Mock(
967-
content=b"\x00",
968-
headers={},
969-
status_code=200,
970-
url=download_url,
971-
)
967+
mock_download_get.return_value = make_mock_response(url=download_url)
972968

973969
self.assertFalse(WebhookSubscription.objects.exists())
974970

@@ -1016,12 +1012,7 @@ def test_scanpipe_management_command_purldb_scan_queue_worker_failure(
10161012
"status": f"updated scannable_uri {scannable_uri_uuid} "
10171013
"scan_status to 'failed'"
10181014
}
1019-
mock_download_get.return_value = mock.Mock(
1020-
content=b"\x00",
1021-
headers={},
1022-
status_code=200,
1023-
url=download_url,
1024-
)
1015+
mock_download_get.return_value = make_mock_response(url=download_url)
10251016

10261017
options = [
10271018
"--max-loops",
@@ -1075,18 +1066,8 @@ def test_scanpipe_management_command_purldb_scan_queue_worker_continue_after_fai
10751066
]
10761067

10771068
mock_download_get.side_effect = [
1078-
mock.Mock(
1079-
content=b"\x00",
1080-
headers={},
1081-
status_code=200,
1082-
url=download_url1,
1083-
),
1084-
mock.Mock(
1085-
content=b"\x00",
1086-
headers={},
1087-
status_code=200,
1088-
url=download_url2,
1089-
),
1069+
make_mock_response(url=download_url1),
1070+
make_mock_response(url=download_url2),
10901071
]
10911072

10921073
mock_request_post.side_effect = [

scanpipe/tests/test_models.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
from scanpipe.tests import license_policies_index
7979
from scanpipe.tests import make_dependency
8080
from scanpipe.tests import make_message
81+
from scanpipe.tests import make_mock_response
8182
from scanpipe.tests import make_package
8283
from scanpipe.tests import make_project
8384
from scanpipe.tests import make_resource_directory
@@ -1473,9 +1474,7 @@ def test_scanpipe_input_source_model_delete_file(self):
14731474
@mock.patch("requests.sessions.Session.get")
14741475
def test_scanpipe_input_source_model_fetch(self, mock_get):
14751476
download_url = "https://download.url/file.zip"
1476-
mock_get.return_value = mock.Mock(
1477-
content=b"\x00", headers={}, status_code=200, url=download_url
1478-
)
1477+
mock_get.return_value = make_mock_response(url=download_url)
14791478

14801479
input_source = self.project1.add_input_source(download_url=download_url)
14811480
destination = input_source.fetch()

scanpipe/tests/test_pipelines.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
from scanpipe.pipes import scancode
5454
from scanpipe.pipes.input import copy_input
5555
from scanpipe.tests import FIXTURES_REGEN
56+
from scanpipe.tests import make_mock_response
5657
from scanpipe.tests import make_package
5758
from scanpipe.tests import make_project
5859
from scanpipe.tests import package_data1
@@ -226,9 +227,7 @@ def test_scanpipe_pipeline_class_download_missing_inputs(self, mock_get):
226227
self.assertEqual("", run.log)
227228

228229
download_url = "https://download.url/file.zip"
229-
mock_get.return_value = mock.Mock(
230-
content=b"\x00", headers={}, status_code=200, url=download_url
231-
)
230+
mock_get.return_value = make_mock_response(url=download_url)
232231
input_source2 = project1.add_input_source(download_url=download_url)
233232
pipeline.download_missing_inputs()
234233
self.assertIn("Fetching input from https://download.url/file.zip", run.log)

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ install_requires =
7676
extractcode[full]==31.0.0
7777
commoncode==32.2.1
7878
Beautifulsoup4[chardet]==4.13.3
79-
packageurl-python==0.16.0
79+
packageurl-python==0.17.1
8080
# FetchCode
8181
fetchcode-container==1.2.3.210512; sys_platform == "linux"
8282
# Inspectors

0 commit comments

Comments
 (0)