Skip to content

Commit 2f2c0e3

Browse files
authored
Add support for "tagging" URL inputs using #<fragment> #708 (#1062)
* Add support for "tagging" URL inputs using #<fragment> #708 Signed-off-by: tdruez <tdruez@nexb.com> * Add support for providing multiple URLs in a single str in API #708 Signed-off-by: tdruez <tdruez@nexb.com> --------- Signed-off-by: tdruez <tdruez@nexb.com>
1 parent 71f33c7 commit 2f2c0e3

File tree

8 files changed

+89
-2
lines changed

8 files changed

+89
-2
lines changed

CHANGELOG.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,13 @@ Unreleased
2424
- Improve the inspect_manifest pipeline to accept archives as inputs.
2525
https://github.com/nexB/scancode.io/issues/1034
2626

27+
- Add support for "tagging" download URL inputs using the "#<fragment>" section of the
28+
URL.
29+
This feature is particularly useful in the map_develop_to_deploy pipeline when
30+
download URLs are utilized as inputs. Tags such as "from" and "to" can be specified
31+
by adding "#from" or "#to" fragments at the end of the download URLs.
32+
https://github.com/nexB/scancode.io/issues/708
33+
2734
v33.0.0 (2024-01-16)
2835
--------------------
2936

scanpipe/api/serializers.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ class StrListField(serializers.ListField):
9797

9898
def to_internal_value(self, data):
9999
if isinstance(data, str):
100-
data = [data]
100+
data = data.split()
101101
return super().to_internal_value(data)
102102

103103

@@ -250,6 +250,10 @@ def get_codebase_relations_summary(self, project):
250250
queryset = project.codebaserelations.all()
251251
return count_group_by(queryset, "map_type")
252252

253+
def validate_input_urls(self, value):
254+
"""Add support for providing multiple URLs in a single string."""
255+
return [url for entry in value for url in entry.split()]
256+
253257
def create(self, validated_data):
254258
"""
255259
Create a new `project` with `upload_file` and `pipeline` as optional.

scanpipe/models.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from operator import itemgetter
3333
from pathlib import Path
3434
from traceback import format_tb
35+
from urllib.parse import urlparse
3536

3637
from django.apps import apps
3738
from django.conf import settings
@@ -983,11 +984,18 @@ def add_input_source(self, download_url="", filename="", is_uploaded=False):
983984
if not download_url and not filename:
984985
raise Exception("Provide at least a value for download_url or filename.")
985986

987+
# Add tag can be provided using the "#<fragment>" part of the URL
988+
tag = ""
989+
if download_url:
990+
parsed_url = urlparse(download_url)
991+
tag = parsed_url.fragment
992+
986993
return InputSource.objects.create(
987994
project=self,
988995
download_url=download_url,
989996
filename=filename,
990997
is_uploaded=is_uploaded,
998+
tag=tag,
991999
)
9921000

9931001
def add_downloads(self, downloads):

scanpipe/pipelines/deploy_to_develop.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ class DeployToDevelop(Pipeline):
3737
prefixes as inputs:
3838
- "from-[FILENAME]" archive containing the development source code
3939
- "to-[FILENAME]" archive containing the deployment compiled code
40+
41+
Alternatively, when using download URLs as inputs, the from and to tag can be
42+
provided adding a "#from" / "#to" fragment at the end of the download URLs.
4043
"""
4144

4245
@classmethod

scanpipe/pipes/d2d.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,16 @@
5858

5959

6060
def get_inputs(project):
61-
"""Locate the ``from`` and ``to`` input files in project inputs/ directory."""
61+
"""
62+
Locate the ``from`` and ``to`` input files in project inputs/ directory.
63+
The input source can be flagged using a "from-" / "to-" prefix in the filename or
64+
by adding a "#from" / "#to" fragment at the end of the download URL.
65+
"""
6266
from_files = list(project.inputs("from*"))
67+
from_files.extend([input.path for input in project.inputsources.filter(tag="from")])
68+
6369
to_files = list(project.inputs("to*"))
70+
to_files.extend([input.path for input in project.inputsources.filter(tag="to")])
6471

6572
if len(from_files) < 1:
6673
raise FileNotFoundError("from* input files not found.")

scanpipe/tests/pipes/test_d2d.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,25 @@ def test_scanpipe_pipes_d2d_get_inputs(self):
7575
self.assertEqual(2, len(from_files))
7676
self.assertEqual(2, len(to_files))
7777

78+
_, input_location = tempfile.mkstemp(prefix="")
79+
self.project1.copy_input_from(input_location)
80+
url_with_fragment = "https://download.url#from"
81+
input_source1 = self.project1.add_input_source(
82+
download_url=url_with_fragment, filename=Path(input_location).name
83+
)
84+
_, input_location = tempfile.mkstemp(prefix="")
85+
self.project1.copy_input_from(input_location)
86+
url_with_fragment = "https://download.url#to"
87+
input_source2 = self.project1.add_input_source(
88+
download_url=url_with_fragment, filename=Path(input_location).name
89+
)
90+
91+
from_files, to_files = d2d.get_inputs(self.project1)
92+
self.assertEqual(3, len(from_files))
93+
self.assertEqual(3, len(to_files))
94+
self.assertIn(input_source1.path, from_files)
95+
self.assertIn(input_source2.path, to_files)
96+
7897
def test_scanpipe_pipes_d2d_get_extracted_path(self):
7998
path = "not/an/extracted/path/"
8099
r1 = make_resource_file(self.project1, path)

scanpipe/tests/test_api.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,41 @@ def test_scanpipe_api_project_create_base(self, mock_execute_pipeline_task):
355355
]
356356
self.assertEqual(expected, response.data["input_sources"])
357357

358+
def test_scanpipe_api_project_create_input_urls(self):
359+
url1 = "https://example.com/1.zip#from"
360+
url2 = "https://example.com/2.zip#to"
361+
data = {
362+
"name": "Inputs as list",
363+
"input_urls": [url1, url2],
364+
}
365+
response = self.csrf_client.post(self.project_list_url, data)
366+
self.assertEqual(status.HTTP_201_CREATED, response.status_code)
367+
self.assertEqual(2, len(response.data["input_sources"]))
368+
369+
data = {
370+
"name": "Inputs as string",
371+
"input_urls": f"{url1} {url2}",
372+
}
373+
response = self.csrf_client.post(self.project_list_url, data)
374+
self.assertEqual(status.HTTP_201_CREATED, response.status_code)
375+
self.assertEqual(2, len(response.data["input_sources"]))
376+
377+
data = {
378+
"name": "Inputs as list of string",
379+
"input_urls": [f"{url1} {url2}"],
380+
}
381+
response = self.csrf_client.post(self.project_list_url, data)
382+
self.assertEqual(status.HTTP_201_CREATED, response.status_code)
383+
self.assertEqual(2, len(response.data["input_sources"]))
384+
385+
data = {
386+
"name": "Inputs as mixed content",
387+
"input_urls": [f"{url1} {url2}", "https://example.com/3.zip"],
388+
}
389+
response = self.csrf_client.post(self.project_list_url, data)
390+
self.assertEqual(status.HTTP_201_CREATED, response.status_code)
391+
self.assertEqual(3, len(response.data["input_sources"]))
392+
358393
def test_scanpipe_api_project_create_multiple_pipelines(self):
359394
data = {
360395
"name": "Single string",

scanpipe/tests/test_models.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,10 @@ def test_scanpipe_project_model_add_input_source(self):
464464
input_sources = self.project1.inputsources.all()
465465
self.assertEqual(2, len(input_sources))
466466

467+
url_with_fragment = "https://download.url#tag_value"
468+
input_source = self.project1.add_input_source(download_url=url_with_fragment)
469+
self.assertEqual("tag_value", input_source.tag)
470+
467471
def test_scanpipe_project_model_add_downloads(self):
468472
file_location = self.data_location / "notice.NOTICE"
469473
copy_input(file_location, self.project1.tmp_path)

0 commit comments

Comments
 (0)