From 58107e82e9c6cb824053e7c91456cc429d4c458e Mon Sep 17 00:00:00 2001 From: Marcel Bochtler Date: Wed, 18 Jun 2025 10:46:19 +0200 Subject: [PATCH 1/4] Fix incorrect netrc format assumption According to the netrc specification (see [1] and [2]), the `machine` part should not be a full URL, but only a host name. Before, using the correct netrc format with only a host name did not work for authentication purposes in Python Inspector. Fix this by using urllib.parse to find the matching host name. [1]: https://www.ibm.com/docs/en/aix/7.2.0?topic=formats-netrc-file-format-tcpip [2]: https://docs.python.org/3/library/netrc.html#netrc.netrc.hosts Resolves: #176. Signed-off-by: Marcel Bochtler --- src/python_inspector/utils.py | 9 ++++++--- tests/data/test-commented.netrc | 4 ++-- tests/data/test.netrc | 3 ++- tests/test_utils.py | 33 +++++++++++++++++++++++++++++++++ 4 files changed, 43 insertions(+), 6 deletions(-) diff --git a/src/python_inspector/utils.py b/src/python_inspector/utils.py index 1548ac0b..67dccdc0 100644 --- a/src/python_inspector/utils.py +++ b/src/python_inspector/utils.py @@ -16,18 +16,21 @@ from typing import NamedTuple from typing import Optional +from urllib.parse import urlparse + import aiohttp import requests def get_netrc_auth(url, netrc): """ - Return login and password if url is in netrc + Return login and password if the hostname is in netrc else return login and password as None """ + hostname = urlparse(url).hostname hosts = netrc.hosts - if url in hosts: - url_auth = hosts.get(url) + if hostname in hosts: + url_auth = hosts.get(hostname) # netrc returns a tuple of (login, account, password) return (url_auth[0], url_auth[2]) return (None, None) diff --git a/tests/data/test-commented.netrc b/tests/data/test-commented.netrc index 858b48fb..2f482bb1 100644 --- a/tests/data/test-commented.netrc +++ b/tests/data/test-commented.netrc @@ -1,2 +1,2 @@ -machine https://pyp2.org/simple login test password test123 -# machine https://pyp1.org/simple login test password test123 \ No newline at end of file +machine pyp2.org login test password test123 +# machine pyp1.org login test password test123 \ No newline at end of file diff --git a/tests/data/test.netrc b/tests/data/test.netrc index d886b2dc..9b1b45cd 100644 --- a/tests/data/test.netrc +++ b/tests/data/test.netrc @@ -1 +1,2 @@ -machine https://pyp1.org/simple login test password test123 \ No newline at end of file +machine pyp1.org login test password test123 +machine subdomain.example.com login subdomain-user password subdomain-secret \ No newline at end of file diff --git a/tests/test_utils.py b/tests/test_utils.py index 17e7e897..250ecea7 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -35,6 +35,25 @@ def test_get_netrc_auth(): netrc_file = test_env.get_test_loc("test.netrc") parsed_netrc = netrc(netrc_file) assert get_netrc_auth(url="https://pyp1.org/simple", netrc=parsed_netrc) == ("test", "test123") + assert get_netrc_auth(url="https://pyp1.org/different/path", netrc=parsed_netrc) == ( + "test", + "test123", + ) + assert get_netrc_auth(url="https://pyp1.org", netrc=parsed_netrc) == ("test", "test123") + + +def test_get_netrc_auth_with_ports_and_schemes(): + netrc_file = test_env.get_test_loc("test.netrc") + parsed_netrc = netrc(netrc_file) + + assert get_netrc_auth(url="https://pyp1.org:443/path", netrc=parsed_netrc) == ( + "test", + "test123", + ) + assert get_netrc_auth(url="http://pyp1.org:80/simple", netrc=parsed_netrc) == ( + "test", + "test123", + ) def test_get_commented_netrc_auth(): @@ -49,6 +68,20 @@ def test_get_netrc_auth_with_no_matching_url(): assert get_netrc_auth(url="https://pypi2.org/simple", netrc=parsed_netrc) == (None, None) +def test_get_netrc_auth_with_with_subdomains(): + netrc_file = test_env.get_test_loc("test.netrc") + parsed_netrc = netrc(netrc_file) + + assert get_netrc_auth(url="https://subdomain.example.com/simple", netrc=parsed_netrc) == ( + "subdomain-user", + "subdomain-secret", + ) + assert get_netrc_auth(url="https://another.example.com/simple", netrc=parsed_netrc) == ( + None, + None, + ) + + @pytest.mark.asyncio @pytest.mark.skipif(sys.version_info < (3, 8), reason="requires python3.8 or higher") @mock.patch("python_inspector.utils_pypi.CACHE.get") From a2808036065dea0ecdde8d7b8b1d731783c6b670 Mon Sep 17 00:00:00 2001 From: Marcel Bochtler Date: Wed, 18 Jun 2025 11:16:50 +0200 Subject: [PATCH 2/4] Support `default` host in netrc Support the fallback to `default` if the user did not set a specific host name in their netrc file. Signed-off-by: Marcel Bochtler --- src/python_inspector/utils.py | 7 ++++++- tests/data/test-default.netrc | 2 ++ tests/test_utils.py | 14 ++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 tests/data/test-default.netrc diff --git a/src/python_inspector/utils.py b/src/python_inspector/utils.py index 67dccdc0..14d2a98b 100644 --- a/src/python_inspector/utils.py +++ b/src/python_inspector/utils.py @@ -24,7 +24,7 @@ def get_netrc_auth(url, netrc): """ - Return login and password if the hostname is in netrc + Return login and password if either the hostname is in netrc or a default is set in netrc else return login and password as None """ hostname = urlparse(url).hostname @@ -33,6 +33,11 @@ def get_netrc_auth(url, netrc): url_auth = hosts.get(hostname) # netrc returns a tuple of (login, account, password) return (url_auth[0], url_auth[2]) + + if "default" in hosts: + default_auth = hosts.get("default") + return (default_auth[0], default_auth[2]) + return (None, None) diff --git a/tests/data/test-default.netrc b/tests/data/test-default.netrc new file mode 100644 index 00000000..c68aa0f0 --- /dev/null +++ b/tests/data/test-default.netrc @@ -0,0 +1,2 @@ +machine example.com login test password test123 +default login defaultuser password defaultpass diff --git a/tests/test_utils.py b/tests/test_utils.py index 250ecea7..d9c0fe5c 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -82,6 +82,20 @@ def test_get_netrc_auth_with_with_subdomains(): ) +def test_get_netrc_auth_with_default(): + netrc_file = test_env.get_test_loc("test-default.netrc") + parsed_netrc = netrc(netrc_file) + + assert get_netrc_auth(url="https://example.com/simple", netrc=parsed_netrc) == ( + "test", + "test123", + ) + assert get_netrc_auth(url="https://non-existing.org/simple", netrc=parsed_netrc) == ( + "defaultuser", + "defaultpass", + ) + + @pytest.mark.asyncio @pytest.mark.skipif(sys.version_info < (3, 8), reason="requires python3.8 or higher") @mock.patch("python_inspector.utils_pypi.CACHE.get") From f634e2dfa99e6a4f00725cb4a383d6d64dc65117 Mon Sep 17 00:00:00 2001 From: Marcel Bochtler Date: Wed, 18 Jun 2025 13:34:58 +0200 Subject: [PATCH 3/4] Fix incorrect credentials type for aiohttp Signed-off-by: Marcel Bochtler --- src/python_inspector/utils_pypi.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/python_inspector/utils_pypi.py b/src/python_inspector/utils_pypi.py index f2757cd6..1cbbece9 100644 --- a/src/python_inspector/utils_pypi.py +++ b/src/python_inspector/utils_pypi.py @@ -1797,7 +1797,10 @@ async def get_remote_file_content( auth = None if credentials: - auth = (credentials.get("login"), credentials.get("password")) + login = credentials.get("login") + password = credentials.get("password") + if login and password: + auth = aiohttp.BasicAuth(login, password) async with aiohttp.ClientSession() as session: async with session.get(url, allow_redirects=True, headers=headers, auth=auth) as response: From e1b179e905dbea21f261d54bfc60baf1bf77ea45 Mon Sep 17 00:00:00 2001 From: Marcel Bochtler Date: Wed, 18 Jun 2025 14:38:28 +0200 Subject: [PATCH 4/4] Ensure `package_url`s end with a slash In PyPI simple repository format, package URLs typically end with the package name and should have a trailing slash [1]. To ensure this trailing slash, the some web servers might redirect to the URL with the trailing slash. This causes the issue that the BasicAuth credentials are removed from the request for security reasons. This was observed with an internal Artifactory repository and adding a trailing slash to the package_url fixes the issue. [1]: https://peps.python.org/pep-0503/#specification Resolves: #127. Signed-off-by: Marcel Bochtler --- src/python_inspector/utils_pypi.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/python_inspector/utils_pypi.py b/src/python_inspector/utils_pypi.py index 1cbbece9..0da998cc 100644 --- a/src/python_inspector/utils_pypi.py +++ b/src/python_inspector/utils_pypi.py @@ -1598,6 +1598,10 @@ async def fetch_links( name using the `index_url` of this repository. """ package_url = f"{self.index_url}/{normalized_name}" + + if not package_url.endswith("/"): + package_url += "/" + text, _ = await CACHE.get( path_or_url=package_url, credentials=self.credentials,