diff --git a/piptools/repositories/pypi.py b/piptools/repositories/pypi.py index 2d7d247c6..da13f08fe 100644 --- a/piptools/repositories/pypi.py +++ b/piptools/repositories/pypi.py @@ -11,6 +11,7 @@ BinaryIO, ContextManager, Dict, + Iterable, Iterator, List, NamedTuple, @@ -28,7 +29,7 @@ from pip._internal.models.index import PackageIndex from pip._internal.models.link import Link from pip._internal.models.wheel import Wheel -from pip._internal.network.session import PipSession +from pip._internal.network.session import PipSession, Response from pip._internal.req import InstallRequirement, RequirementSet from pip._internal.utils.hashes import FAVORITE_HASH from pip._internal.utils.logging import indent_log, setup_logging @@ -246,7 +247,55 @@ def get_dependencies(self, ireq: InstallRequirement) -> Set[InstallRequirement]: return self._dependencies_cache[ireq] - def _get_project(self, ireq: InstallRequirement) -> Any: + def _get_json_from_index(self, link: Link) -> Optional[Dict[str, Any]]: + url = f"{link.url}/json" + try: + response = self.session.get(url) + except RequestException as e: + log.debug(f"Fetch package info from PyPI failed: {url}: {e}") + return None + + # Skip this PyPI server, because there is no package + # or JSON API might be not supported + if response.status_code == 404: + return None + + try: + return self._get_json_obj(response) + except ValueError as e: + log.debug(f"Cannot parse JSON response from PyPI: {url}: {e}") + return None + + @staticmethod + def _get_json_obj(resp: Response) -> Optional[Dict[str, Any]]: + decoded_json = resp.json() + + if decoded_json is None: + return decoded_json + + # This is actually guaranteed by the python stdlib. Json can only contain these types + if not isinstance(decoded_json, dict): + raise ValueError( + f"Invalid json type {type(decoded_json)}. Expected 'object'" + ) + + return decoded_json + + def _get_all_package_links(self, ireq: InstallRequirement) -> Iterable[Link]: + package_indexes = ( + PackageIndex(url=index_url, file_storage_domain="") + for index_url in self.finder.search_scope.index_urls + ) + package_links = ( + Link( + f"{package_index.pypi_url}/{ireq.name}", + comes_from=package_index.simple_url, + ) + for package_index in package_indexes + ) + return package_links + + def _get_project(self, ireq: InstallRequirement) -> Optional[Dict[str, Any]]: """ Return a dict of a project info from PyPI JSON API for a given InstallRequirement. Return None on HTTP/JSON error or if a package @@ -254,29 +303,10 @@ def _get_project(self, ireq: InstallRequirement) -> Any: API reference: https://warehouse.readthedocs.io/api-reference/json/ """ - package_indexes = ( - PackageIndex(url=index_url, file_storage_domain="") - for index_url in self.finder.search_scope.index_urls - ) - for package_index in package_indexes: - url = f"{package_index.pypi_url}/{ireq.name}/json" - try: - response = self.session.get(url) - except RequestException as e: - log.debug(f"Fetch package info from PyPI failed: {url}: {e}") - continue - - # Skip this PyPI server, because there is no package - # or JSON API might be not supported - if response.status_code == 404: - continue - - try: - data = response.json() - except ValueError as e: - log.debug(f"Cannot parse JSON response from PyPI: {url}: {e}") - continue - return data + for link in self._get_all_package_links(ireq): + data = self._get_json_from_index(link) + if data is not None: + return data return None def _get_download_path(self, ireq: InstallRequirement) -> str: @@ -327,26 +357,49 @@ def get_hashes(self, ireq: InstallRequirement) -> Set[str]: log.debug(ireq.name) with log.indentation(): - hashes = self._get_hashes_from_pypi(ireq) - if hashes is None: - log.debug("Couldn't get hashes from PyPI, fallback to hashing files") - return self._get_hashes_from_files(ireq) + # get pypi hashes using the json API, once per server + pypi_hashes = self._get_hashes_from_pypi(ireq) + # Compute hashes for ALL candidate installation links, using hashes from json APIs + hashes = self._get_hashes_from_files(ireq, pypi_hashes) return hashes - def _get_hashes_from_pypi(self, ireq: InstallRequirement) -> Optional[Set[str]]: + def _get_hashes_from_pypi( + self, ireq: InstallRequirement + ) -> Optional[Dict[str, Set[str]]]: + # package links help us construct a json query URL for index servers. + # Note the same type but different usage from installation candidates because we only care + # about one per server. + all_package_links = self._get_all_package_links(ireq) + + # Get json from each index server + pypi_json = { + link.comes_from: self._get_json_from_index(link) + for link in all_package_links + } + pypi_hashes = { + url: self._get_hash_from_json(json_resp, ireq) + for url, json_resp in pypi_json.items() + if json_resp + } + + # remove duplicates and empty json responses + hashes_by_index = { + url: hashes for url, hashes in pypi_hashes.items() if hashes is not None + } + return hashes_by_index or None + + def _get_hash_from_json( + self, pypi_json: Dict[str, Any], ireq: InstallRequirement + ) -> Optional[Set[str]]: """ Return a set of hashes from PyPI JSON API for a given InstallRequirement. Return None if fetching data is failed or missing digests. """ - project = self._get_project(ireq) - if project is None: - return None - _, version, _ = as_tuple(ireq) try: - release_files = project["releases"][version] + release_files = pypi_json["releases"][version] except KeyError: log.debug("Missing release files on PyPI") return None @@ -363,7 +416,11 @@ def _get_hashes_from_pypi(self, ireq: InstallRequirement) -> Optional[Set[str]]: return hashes - def _get_hashes_from_files(self, ireq: InstallRequirement) -> Set[str]: + def _get_hashes_from_files( + self, + ireq: InstallRequirement, + pypi_hashes: Optional[Dict[str, Set[str]]] = None, + ) -> Set[str]: """ Return a set of hashes for all release files of a given InstallRequirement. """ @@ -377,11 +434,29 @@ def _get_hashes_from_files(self, ireq: InstallRequirement) -> Set[str]: ) matching_candidates = candidates_by_version[matching_versions[0]] - return { - self._get_file_hash(candidate.link) for candidate in matching_candidates - } + return set( + itertools.chain.from_iterable( + self._get_hashes_for_link(candidate.link, pypi_hashes) + for candidate in matching_candidates + ) + ) + + def _get_hashes_for_link( + self, link: Link, pypi_hashes: Optional[Dict[str, Set[str]]] + ) -> Iterator[str]: + # This conditional feels too peculiar to tolerate future maintenance. + # But figuring out how the Link is constructed in find_all_candidates smells like + # it isn't a guaranteed API + if pypi_hashes and link.comes_from in pypi_hashes: + yield from pypi_hashes[link.comes_from] + + else: + yield self._get_file_hash(link) def _get_file_hash(self, link: Link) -> str: + if link.has_hash: + return f"{link.hash_name}:{link.hash}" + log.debug(f"Hashing {link.show_url}") h = hashlib.new(FAVORITE_HASH) with open_local_or_remote_file(link, self.session) as f: diff --git a/tests/test_repository_pypi.py b/tests/test_repository_pypi.py index 3e9e4592d..072df7dd6 100644 --- a/tests/test_repository_pypi.py +++ b/tests/test_repository_pypi.py @@ -1,13 +1,17 @@ import os +from pathlib import Path +from typing import Any, Iterable, List from unittest import mock import pytest +from pip._internal.models.candidate import InstallationCandidate from pip._internal.models.link import Link from pip._internal.utils.urls import path_to_url from pip._vendor.requests import HTTPError, Session from piptools.repositories import PyPIRepository from piptools.repositories.pypi import open_local_or_remote_file +from tests.constants import MINIMAL_WHEELS_PATH def test_generate_hashes_all_platforms(capsys, pip_conf, from_line, pypi_repository): @@ -179,7 +183,7 @@ def test_pip_cache_dir_is_empty(from_line, tmpdir): ] } }, - {"sha256:fake-hash"}, + {"https://pypi.org/simple": {"sha256:fake-hash"}}, id="return single hash", ), pytest.param( @@ -197,7 +201,12 @@ def test_pip_cache_dir_is_empty(from_line, tmpdir): ] } }, - {"sha256:fake-hash-number1", "sha256:fake-hash-number2"}, + { + "https://pypi.org/simple": { + "sha256:fake-hash-number1", + "sha256:fake-hash-number2", + } + }, id="return multiple hashes", ), pytest.param( @@ -219,7 +228,12 @@ def test_pip_cache_dir_is_empty(from_line, tmpdir): ] } }, - {"sha256:fake-hash-number1", "sha256:fake-hash-number2"}, + { + "https://pypi.org/simple": { + "sha256:fake-hash-number1", + "sha256:fake-hash-number2", + } + }, id="return only bdist_wheel and sdist hashes", ), pytest.param(None, None, id="not found project data"), @@ -250,7 +264,7 @@ def test_get_hashes_from_pypi(from_line, tmpdir, project_data, expected_hashes): """ class MockPyPIRepository(PyPIRepository): - def _get_project(self, ireq): + def _get_json_from_index(self, ireq): return project_data pypi_repository = MockPyPIRepository( @@ -262,6 +276,172 @@ def _get_project(self, ireq): assert actual_hashes == expected_hashes +@pytest.mark.parametrize( + ("pypi_json", "expected_hashes"), + ( + pytest.param( + { + "https://pypi.org/pypi/fake-package": { + "releases": { + "0.1": [ + { + "packagetype": "bdist_wheel", + "digests": {"sha256": "fake-hash"}, + } + ] + }, + "comes_from": "https://pypi.org/simple", + }, + }, + {"sha256:fake-hash"}, + id="return single hash", + ), + pytest.param( + { + "https://pypi.org/pypi/fake-package": { + "releases": { + "0.1": [ + { + "packagetype": "bdist_wheel", + "digests": {"sha256": "fake-hash-number1"}, + }, + { + "packagetype": "sdist", + "digests": {"sha256": "fake-hash-number2"}, + }, + ] + }, + "comes_from": "https://pypi.org/simple", + } + }, + {"sha256:fake-hash-number1", "sha256:fake-hash-number2"}, + id="return multiple hashes", + ), + pytest.param( + { + "https://pypi.org/simple/fake-package": { + "releases": { + "0.1": [ + { + "packagetype": "bdist_wheel", + "digests": {"sha256": "fake-hash-number1"}, + }, + ] + }, + "comes_from": "https://pypi.org/simple", + }, + "https://internal-index-server.com/pypi/fake-package": { + "releases": { + "0.1": [ + { + "packagetype": "sdist", + "digests": {"sha256": "fake-hash-number2"}, + }, + ] + }, + "comes_from": "https://internal-index-server.com/simple", + }, + }, + {"sha256:fake-hash-number1", "sha256:fake-hash-number2"}, + id="return multiple indices", + ), + ), +) +def test_get_hashes_multiple_indices_json_api( + from_line, tmpdir, pypi_json, expected_hashes +): + """ + Test PyPIRepository.get_hashes() returns expected hashes for multiple indices. + """ + + # I don't love how specific the details of this test are. If PyPIRepository changes internal + # implementation details, this mock class will need to change too. + class MockPyPIRepository(PyPIRepository): + def _get_all_package_links(self, _: Any) -> Iterable[Any]: + return [ + Link(url, comes_from=pypi_json[url]["comes_from"]) for url in pypi_json + ] + + def _get_json_from_index(self, link: Link) -> Any: + return pypi_json.get(link.url) + + def find_all_candidates(self, req_name: str) -> List[InstallationCandidate]: + # I don't know why, but candidates have a different URL the the json links + return [ + InstallationCandidate( + "fake-package", + "0.1", + Link(url, comes_from=pypi_json[url]["comes_from"]), + ) + for url in pypi_json + ] + + pypi_repository = MockPyPIRepository( + ["--no-cache-dir"], cache_dir=(tmpdir / "pypi-repo-cache") + ) + ireq = from_line("fake-package==0.1") + + actual_hashes = pypi_repository.get_hashes(ireq) + assert actual_hashes == expected_hashes + + +def test_get_hashes_pypi_and_simple_index_server(from_line, tmpdir): + """ + test PyPIRepository.get_hashes() when a file hash needs to be computed + """ + expected_hashes = { + "sha256:fake-hash-number1", + "sha256:5e6071ee6e4c59e0d0408d366fe9b66781d2cf01be9a6e19a2433bb3c5336330", + } + + simple_server_address = "https://internal-index-server.com/pypi/small-fake-a" + hashable_path = Path(MINIMAL_WHEELS_PATH) / "small_fake_a-0.1-py2.py3-none-any.whl" + + pypi_response = { + "releases": { + "0.1": [ + { + "packagetype": "sdist", + "digests": {"sha256": "fake-hash-number1"}, + }, + ] + }, + "comes_from": "https://pypi.org/simple", + } + installation_links = [ + Link( + "https://pypi.org/pypi/small-fake-a", comes_from=pypi_response["comes_from"] + ), + Link(f"file://{hashable_path.absolute()}", comes_from=simple_server_address), + ] + installation_candidates = [ + InstallationCandidate("small-fake-a", "0.1", link) + for link in installation_links + ] + + ireq = from_line("small-fake-a==0.1") + + pypi_repository = PyPIRepository( + [ + "--index-url", + PyPIRepository.DEFAULT_INDEX_URL, + "--extra-index-url", + simple_server_address, + ], + cache_dir=(tmpdir / "pypi-repo"), + ) + + with mock.patch.object( + pypi_repository, "_get_json_from_index", return_value=pypi_response + ): + with mock.patch.object( + pypi_repository, "find_all_candidates", return_value=installation_candidates + ): + actual_hashes = pypi_repository.get_hashes(ireq) + + assert actual_hashes == expected_hashes + + def test_get_project__returns_data(from_line, tmpdir, monkeypatch, pypi_repository): """ Test PyPIRepository._get_project() returns expected project data.