Skip to content

Commit 6e8aa2a

Browse files
authored
Add support for fetching authentications #620 (#1097)
Signed-off-by: tdruez <tdruez@nexb.com>
1 parent d4b299c commit 6e8aa2a

File tree

12 files changed

+168
-10
lines changed

12 files changed

+168
-10
lines changed

CHANGELOG.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ v33.2.0 (unreleased)
4848
action providing a value for the new `output_format` parameter.
4949
https://github.com/nexB/scancode.io/issues/1091
5050

51+
- Add multiple settings related to fetching private files. Those settings allow to
52+
define credentials for various authentication types.
53+
https://github.com/nexB/scancode.io/issues/620
54+
5155
- Update matchcode-toolkit to v3.0.0
5256

5357
v33.1.0 (2024-02-02)

docs/application-settings.rst

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,3 +313,63 @@ However, if authentication is enabled on your VulnerableCode instance,
313313
you can provide the API key using ``VULNERABLECODE_API_KEY``::
314314

315315
VULNERABLECODE_API_KEY=insert_your_api_key_here
316+
317+
.. _scancodeio_settings_fetch_authentication:
318+
319+
Fetch Authentication
320+
--------------------
321+
322+
Several settings are available to define the credentials required to access your
323+
private files, depending on the authentication type: Basic, Digest, Token header, etc.
324+
325+
.. note::
326+
The provided credentials are enabled for all projects on the ScanCode.io instance.
327+
328+
.. warning::
329+
Ensure that the provided ``host`` values are fully qualified, including the domain
330+
and subdomain.
331+
332+
.. _scancodeio_settings_fetch_basic_auth:
333+
334+
SCANCODEIO_FETCH_BASIC_AUTH
335+
^^^^^^^^^^^^^^^^^^^^^^^^^^^
336+
337+
You can provide credentials for input URLs protected by Basic Authentication using
338+
the ``host=user,password`` syntax::
339+
340+
SCANCODEIO_FETCH_BASIC_AUTH="www.host1.com=user,password;www.host2.com=user,password;"
341+
342+
.. _scancodeio_settings_fetch_digest_auth:
343+
344+
SCANCODEIO_FETCH_DIGEST_AUTH
345+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
346+
347+
You can provide credentials for input URLs protected by Digest Authentication using
348+
the ``host=user,password`` syntax::
349+
350+
SCANCODEIO_FETCH_DIGEST_AUTH="www.host1.com=user,password;www.host2.com=user,password;"
351+
352+
.. _scancodeio_settings_fetch_headers:
353+
354+
SCANCODEIO_FETCH_HEADERS
355+
^^^^^^^^^^^^^^^^^^^^^^^^
356+
357+
When authentication credentials can be provided through HTTP request headers, you can
358+
use the following syntax::
359+
360+
SCANCODEIO_FETCH_HEADERS="www.host1.com=Header1=value,Header2=value;"
361+
362+
Example for a GitHub private repository::
363+
364+
SCANCODEIO_FETCH_HEADERS="raw.github.com=Authorization=token <YOUR_TOKEN>"
365+
366+
.. _scancodeio_settings_netrc_location:
367+
368+
SCANCODEIO_NETRC_LOCATION
369+
^^^^^^^^^^^^^^^^^^^^^^^^^
370+
371+
If your credentials are stored in a
372+
`.netrc <https://everything.curl.dev/usingcurl/netrc>`_ file, you can provide its
373+
location on disk using::
374+
375+
SCANCODEIO_NETRC_LOCATION="~/.netrc"

docs/faq.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,3 +197,19 @@ There are multiple ways to tag input files when uploading local files:
197197

198198
- **Command Line Interface:** Tag uploaded files using the "filename:tag" syntax.
199199
Example: ``--input-file path/filename:tag``.
200+
201+
How to fetch files from private sources and protected by credentials?
202+
---------------------------------------------------------------------
203+
204+
Several :ref:`scancodeio_settings_fetch_authentication` settings are available to
205+
define the credentials required to access your private files, depending on the
206+
authentication type:
207+
208+
- :ref:`Basic authentication <scancodeio_settings_fetch_basic_auth>`
209+
- :ref:`Digest authentication <scancodeio_settings_fetch_digest_auth>`
210+
- :ref:`HTTP request headers <scancodeio_settings_fetch_headers>`
211+
- :ref:`.netrc file <scancodeio_settings_netrc_location>`
212+
213+
Example for GitHub private repository files::
214+
215+
SCANCODEIO_FETCH_HEADERS="github.com=Authorization=token <YOUR_TOKEN>"

scancodeio/settings.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,36 @@
115115
# Default limit for "most common" entries in QuerySets.
116116
SCANCODEIO_MOST_COMMON_LIMIT = env.int("SCANCODEIO_MOST_COMMON_LIMIT", default=7)
117117

118+
# Fetch authentication credentials
119+
120+
# SCANCODEIO_FETCH_BASIC_AUTH="host=user,password;"
121+
SCANCODEIO_FETCH_BASIC_AUTH = env.dict(
122+
"SCANCODEIO_FETCH_BASIC_AUTH",
123+
cast={"value": tuple},
124+
default={},
125+
)
126+
127+
# SCANCODEIO_FETCH_DIGEST_AUTH="host=user,password;"
128+
SCANCODEIO_FETCH_DIGEST_AUTH = env.dict(
129+
"SCANCODEIO_FETCH_DIGEST_AUTH",
130+
cast={"value": tuple},
131+
default={},
132+
)
133+
134+
# SCANCODEIO_FETCH_HEADERS="host=Header1=value,Header2=value;"
135+
SCANCODEIO_FETCH_HEADERS = {}
136+
FETCH_HEADERS_STR = env.str("SCANCODEIO_FETCH_HEADERS", default="")
137+
for entry in FETCH_HEADERS_STR.split(";"):
138+
if entry.strip():
139+
host, headers = entry.split("=", 1)
140+
SCANCODEIO_FETCH_HEADERS[host] = env.parse_value(headers, cast=dict)
141+
142+
# SCANCODEIO_NETRC_LOCATION="~/.netrc"
143+
SCANCODEIO_NETRC_LOCATION = env.str("SCANCODEIO_NETRC_LOCATION", default="")
144+
if SCANCODEIO_NETRC_LOCATION:
145+
# Propagate the location to the environ for `requests.utils.get_netrc_auth`
146+
env.ENVIRON["NETRC"] = SCANCODEIO_NETRC_LOCATION
147+
118148
# Application definition
119149

120150
INSTALLED_APPS = [

scanpipe/api/views.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ def reset(self, request, *args, **kwargs):
343343
project = self.get_object()
344344

345345
if self.request.method == "GET":
346-
message = "POST on this URL to reset the project. " ""
346+
message = "POST on this URL to reset the project."
347347
return Response({"status": message})
348348

349349
try:

scanpipe/pipes/fetch.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,14 @@
3232
from urllib.parse import unquote
3333
from urllib.parse import urlparse
3434

35+
from django.conf import settings
36+
3537
import requests
3638
from commoncode import command
3739
from commoncode.hash import multi_checksums
3840
from commoncode.text import python_safe_name
3941
from plugincode.location_provider import get_location
42+
from requests import auth as request_auth
4043

4144
logger = logging.getLogger("scanpipe.pipes")
4245

@@ -70,12 +73,30 @@ def run_command_safely(command_args):
7073
return result.stdout
7174

7275

76+
def get_request_session(uri):
77+
"""Return a Requests session setup with authentication and headers."""
78+
session = requests.Session()
79+
netloc = urlparse(uri).netloc
80+
81+
if credentials := settings.SCANCODEIO_FETCH_BASIC_AUTH.get(netloc):
82+
session.auth = request_auth.HTTPBasicAuth(*credentials)
83+
84+
elif credentials := settings.SCANCODEIO_FETCH_DIGEST_AUTH.get(netloc):
85+
session.auth = request_auth.HTTPDigestAuth(*credentials)
86+
87+
if headers := settings.SCANCODEIO_FETCH_HEADERS.get(netloc):
88+
session.headers.update(headers)
89+
90+
return session
91+
92+
7393
def fetch_http(uri, to=None):
7494
"""
7595
Download a given `uri` in a temporary directory and return the directory's
7696
path.
7797
"""
78-
response = requests.get(uri, timeout=5)
98+
request_session = get_request_session(uri)
99+
response = request_session.get(uri, timeout=5)
79100

80101
if response.status_code != 200:
81102
raise requests.RequestException
@@ -329,8 +350,9 @@ def check_urls_availability(urls):
329350
if not url.startswith("http"):
330351
continue
331352

353+
request_session = get_request_session(url)
332354
try:
333-
response = requests.head(url, timeout=3)
355+
response = request_session.head(url, timeout=5)
334356
response.raise_for_status()
335357
except requests.exceptions.RequestException:
336358
errors.append(url)

scanpipe/tests/pipes/test_fetch.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@
2424
from unittest import mock
2525

2626
from django.test import TestCase
27+
from django.test import override_settings
28+
29+
from requests import auth as request_auth
2730

2831
from scanpipe.pipes import fetch
2932

@@ -56,7 +59,7 @@ def test_scanpipe_pipes_fetch_get_fetcher(self):
5659
expected = "URL scheme 'DOCKER' is not supported. Did you mean: 'docker'?"
5760
self.assertEqual(expected, str(cm.exception))
5861

59-
@mock.patch("requests.get")
62+
@mock.patch("requests.sessions.Session.get")
6063
def test_scanpipe_pipes_fetch_http(self, mock_get):
6164
url = "https://example.com/filename.zip"
6265

@@ -120,7 +123,7 @@ def test_scanpipe_pipes_fetch_docker_image_string_injection_protection(self):
120123
fetch.fetch_docker_image(url)
121124
self.assertEqual("Invalid Docker reference.", str(cm.exception))
122125

123-
@mock.patch("requests.get")
126+
@mock.patch("requests.sessions.Session.get")
124127
def test_scanpipe_pipes_fetch_fetch_urls(self, mock_get):
125128
urls = [
126129
"https://example.com/filename.zip",
@@ -141,3 +144,26 @@ def test_scanpipe_pipes_fetch_fetch_urls(self, mock_get):
141144
self.assertEqual(0, len(downloads))
142145
self.assertEqual(2, len(errors))
143146
self.assertEqual(urls, errors)
147+
148+
def test_scanpipe_pipes_fetch_get_request_session(self):
149+
url = "https://example.com/filename.zip"
150+
host = "example.com"
151+
credentials = ("user", "pass")
152+
153+
session = fetch.get_request_session(url)
154+
self.assertIsNone(session.auth)
155+
156+
with override_settings(SCANCODEIO_FETCH_BASIC_AUTH={host: credentials}):
157+
session = fetch.get_request_session(url)
158+
self.assertEqual(request_auth.HTTPBasicAuth(*credentials), session.auth)
159+
160+
with override_settings(SCANCODEIO_FETCH_DIGEST_AUTH={host: credentials}):
161+
session = fetch.get_request_session(url)
162+
self.assertEqual(request_auth.HTTPDigestAuth(*credentials), session.auth)
163+
164+
headers = {
165+
host: {"Authorization": "token TOKEN"},
166+
}
167+
with override_settings(SCANCODEIO_FETCH_HEADERS=headers):
168+
session = fetch.get_request_session(url)
169+
self.assertEqual("token TOKEN", session.headers.get("Authorization"))

scanpipe/tests/test_commands.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ def test_scanpipe_management_command_add_input_file(self):
199199
with self.assertRaisesMessage(CommandError, expected):
200200
call_command("add-input", *options, stdout=out)
201201

202-
@mock.patch("requests.get")
202+
@mock.patch("requests.sessions.Session.get")
203203
def test_scanpipe_management_command_add_input_url(self, mock_get):
204204
mock_get.side_effect = None
205205
mock_get.return_value = mock.Mock(

scanpipe/tests/test_forms.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class ScanPipeFormsTest(TestCase):
3838
def setUp(self):
3939
self.project1 = Project.objects.create(name="Analysis")
4040

41-
@mock.patch("requests.head")
41+
@mock.patch("requests.sessions.Session.head")
4242
def test_scanpipe_forms_inputs_base_form_input_urls(self, mock_head):
4343
data = {
4444
"input_urls": "Docker://debian",

scanpipe/tests/test_models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1207,7 +1207,7 @@ def test_scanpipe_input_source_model_delete_file(self):
12071207
input_source.delete_file()
12081208
self.assertFalse(input_source.exists())
12091209

1210-
@mock.patch("requests.get")
1210+
@mock.patch("requests.sessions.Session.get")
12111211
def test_scanpipe_input_source_model_fetch(self, mock_get):
12121212
download_url = "https://download.url/file.zip"
12131213
mock_get.return_value = mock.Mock(

0 commit comments

Comments
 (0)