Skip to content

Commit 98f0f1e

Browse files
authored
Add support for fetching git repo as Project input #921 (#1254)
Signed-off-by: tdruez <tdruez@nexb.com>
1 parent ea2c8ce commit 98f0f1e

File tree

11 files changed

+133
-11
lines changed

11 files changed

+133
-11
lines changed

CHANGELOG.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ v34.6.2 (unreleased)
88
pipeline.
99
https://github.com/nexB/scancode.io/issues/1253
1010

11+
- Add support for fetching Git repository as Project input.
12+
https://github.com/nexB/scancode.io/issues/921
13+
1114
v34.6.1 (2024-06-07)
1215
--------------------
1316

docs/command-line-interface.rst

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,8 +188,13 @@ You can also provide URLs of files to be downloaded to the foo project
188188
$ [...] --input-url docker://postgres:13
189189
$ [...] --input-url docker://docker.elastic.co/elasticsearch/elasticsearch-oss:7.10.2
190190

191-
See https://docs.docker.com/engine/reference/builder/ for more details about
192-
references.
191+
See https://docs.docker.com/engine/reference/builder/ for more details about
192+
references.
193+
194+
.. note:: Git repositories are supported as input using their Git clone URL in the
195+
``https://<host>[:<port>]/<path-to-git-repo>.git`` syntax. For example::
196+
197+
$ [...] --input-url https://github.com/nexB/scancode.io.git
193198

194199

195200
`$ scanpipe add-pipeline --project PROJECT PIPELINE_NAME [PIPELINE_NAME ...]`

docs/faq.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,3 +233,19 @@ Example for GitHub private repository files::
233233
Example for Docker private repository::
234234

235235
SCANCODEIO_SKOPEO_CREDENTIALS="registry.com=user:password"
236+
237+
Can I use a git repository as project input?
238+
--------------------------------------------
239+
240+
Yes, as an alternative to an uploaded file, or an download URL targeting an archive,
241+
you may directly provide the URL to a git repository.
242+
The repository will be cloned in the project inputs, fetching only the latest commit
243+
history, at the start of a pipeline execution.
244+
245+
Note that only the HTTPS type of URL is supported::
246+
247+
https://<host>[:<port>]/<path-to-git-repo>.git`
248+
249+
A GitHub repository URL example::
250+
251+
https://github.com/username/repository.git

scanpipe/models.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -939,7 +939,7 @@ def get_inputs_with_source(self):
939939
"tag": input_source.tag,
940940
# Properties
941941
"size": input_source.size,
942-
"is_file": True,
942+
"is_file": input_source.is_file(),
943943
# Methods
944944
"exists": input_source.exists(),
945945
}
@@ -1627,9 +1627,26 @@ def exists(self):
16271627
return False
16281628

16291629
def delete_file(self):
1630-
"""Delete the file on disk."""
1630+
"""Delete the file or directory on disk."""
16311631
if path := self.path:
1632-
path.unlink(missing_ok=True)
1632+
if path.is_dir():
1633+
shutil.rmtree(path)
1634+
else:
1635+
path.unlink(missing_ok=True)
1636+
1637+
def is_file(self):
1638+
"""
1639+
Check if this InputSource path is a file.
1640+
1641+
Returns True if the path does not exist to maintain backward compatibility
1642+
with the behavior when downloaded InputSources were always files.
1643+
1644+
This method now accounts for the possibility of directories, such as in the
1645+
case of a git clone.
1646+
"""
1647+
if self.exists():
1648+
return self.path.is_file()
1649+
return True
16331650

16341651
@property
16351652
def size(self):

scanpipe/pipelines/scan_codebase.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def copy_inputs_to_codebase_directory(self):
5252
Copy input files to the project's codebase/ directory.
5353
The code can also be copied there prior to running the Pipeline.
5454
"""
55-
copy_inputs(self.project.inputs(), self.project.codebase_path)
55+
copy_inputs(self.project.inputs("*"), self.project.codebase_path)
5656

5757
def collect_and_create_codebase_resources(self):
5858
"""Collect and create codebase resources."""

scanpipe/pipelines/scan_single_package.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def steps(cls):
7070
def get_package_input(self):
7171
"""Locate the package input in the project's input/ directory."""
7272
input_files = self.project.input_files
73-
inputs = list(self.project.inputs())
73+
inputs = list(self.project.inputs("*"))
7474

7575
if len(inputs) != 1 or len(input_files) != 1:
7676
raise Exception("Only 1 input file supported")

scanpipe/pipes/fetch.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434

3535
from django.conf import settings
3636

37+
import git
3738
import requests
3839
from commoncode import command
3940
from commoncode.hash import multi_checksums
@@ -312,6 +313,28 @@ def fetch_docker_image(docker_url, to=None):
312313
)
313314

314315

316+
def fetch_git_repo(url, to=None):
317+
"""Fetch provided git ``url`` as a clone and return a ``Download`` object."""
318+
download_directory = to or tempfile.mkdtemp()
319+
url = url.rstrip("/")
320+
filename = url.split("/")[-1]
321+
to_path = Path(download_directory) / filename
322+
# Disable any prompt, especially for credentials
323+
git_env = {"GIT_TERMINAL_PROMPT": "0"}
324+
325+
git.Repo.clone_from(url=url, to_path=to_path, depth=1, env=git_env)
326+
327+
return Download(
328+
uri=url,
329+
directory=download_directory,
330+
filename=filename,
331+
path=to_path,
332+
size="",
333+
sha1="",
334+
md5="",
335+
)
336+
337+
315338
SCHEME_TO_FETCHER_MAPPING = {
316339
"http": fetch_http,
317340
"https": fetch_http,
@@ -321,6 +344,12 @@ def fetch_docker_image(docker_url, to=None):
321344

322345
def get_fetcher(url):
323346
"""Return the fetcher function based on the provided `url` scheme."""
347+
if url.startswith("git@"):
348+
raise ValueError("SSH 'git@' URLs are not supported. Use https:// instead.")
349+
350+
if url.rstrip("/").endswith(".git"):
351+
return fetch_git_repo
352+
324353
# Not using `urlparse(url).scheme` for the scheme as it converts to lower case.
325354
scheme = url.split("://")[0]
326355

scanpipe/pipes/input.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,16 @@
4040

4141

4242
def copy_input(input_location, dest_path):
43-
"""Copy the ``input_location`` to the ``dest_path``."""
44-
destination = dest_path / Path(input_location).name
45-
return shutil.copyfile(input_location, destination)
43+
"""Copy the ``input_location`` (file or directory) to the ``dest_path``."""
44+
input_path = Path(input_location)
45+
destination = Path(dest_path) / input_path.name
46+
47+
if input_path.is_dir():
48+
shutil.copytree(input_location, destination)
49+
else:
50+
shutil.copyfile(input_location, destination)
51+
52+
return destination
4653

4754

4855
def copy_inputs(input_locations, dest_path):

scanpipe/templates/scanpipe/panels/project_inputs.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
{% if input_source.is_file and input_source.size %}
3737
<span class="mr-1">{{ input_source.size|filesizeformat }}</span>
3838
{% endif %}
39-
{% if input_source.exists and input_source.filename %}
39+
{% if input_source.is_file and input_source.exists and input_source.filename %}
4040
<a class="is-grey-link is-clickable" href="{% url 'project_download_input' project.slug input_source.filename %}"><span class="icon width-1 height-1"><i class="fa-solid fa-download"></i></span></a>
4141
{% endif %}
4242
{% if project.can_change_inputs and input_source.uuid %}

scanpipe/tests/pipes/test_fetch.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ def test_scanpipe_pipes_fetch_get_fetcher(self):
3838
self.assertEqual(fetch.fetch_http, fetch.get_fetcher("http://a.b/f.z"))
3939
self.assertEqual(fetch.fetch_http, fetch.get_fetcher("https://a.b/f.z"))
4040
self.assertEqual(fetch.fetch_docker_image, fetch.get_fetcher("docker://image"))
41+
git_http_url = "https://github.com/nexB/scancode.io.git"
42+
self.assertEqual(fetch.fetch_git_repo, fetch.get_fetcher(git_http_url))
43+
self.assertEqual(fetch.fetch_git_repo, fetch.get_fetcher(git_http_url + "/"))
4144

4245
with self.assertRaises(ValueError) as cm:
4346
fetch.get_fetcher("")
@@ -59,6 +62,11 @@ def test_scanpipe_pipes_fetch_get_fetcher(self):
5962
expected = "URL scheme 'DOCKER' is not supported. Did you mean: 'docker'?"
6063
self.assertEqual(expected, str(cm.exception))
6164

65+
with self.assertRaises(ValueError) as cm:
66+
fetch.get_fetcher("git@github.com:nexB/scancode.io.git")
67+
expected = "SSH 'git@' URLs are not supported. Use https:// instead."
68+
self.assertEqual(expected, str(cm.exception))
69+
6270
@mock.patch("requests.sessions.Session.get")
6371
def test_scanpipe_pipes_fetch_http(self, mock_get):
6472
url = "https://example.com/filename.zip"
@@ -217,3 +225,16 @@ def test_scanpipe_pipes_fetch_get_request_session(self):
217225
with override_settings(SCANCODEIO_FETCH_HEADERS=headers):
218226
session = fetch.get_request_session(url)
219227
self.assertEqual("token TOKEN", session.headers.get("Authorization"))
228+
229+
@mock.patch("git.repo.base.Repo.clone_from")
230+
def test_scanpipe_pipes_fetch_git_repo(self, mock_clone_from):
231+
mock_clone_from.return_value = None
232+
url = "https://github.com/nexB/scancode.io.git"
233+
download = fetch.fetch_git_repo(url)
234+
235+
self.assertEqual(url, download.uri)
236+
self.assertEqual("scancode.io.git", download.filename)
237+
self.assertTrue(str(download.path).endswith("scancode.io.git"))
238+
self.assertEqual("", download.size)
239+
self.assertEqual("", download.sha1)
240+
self.assertEqual("", download.md5)

0 commit comments

Comments
 (0)