Skip to content

Commit b9641ae

Browse files
authored
Add a file-based cache for remote intersphinx inventories (#13684)
1 parent c384ab9 commit b9641ae

File tree

5 files changed

+110
-29
lines changed

5 files changed

+110
-29
lines changed

CHANGES.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ Features added
5454
Patch by Adam Turner.
5555
* #13647: LaTeX: allow more cases of table nesting.
5656
Patch by Jean-François B.
57+
* #13684: intersphinx: Add a file-based cache for remote inventories.
58+
The location of the cache directory must not be relied upon externally,
59+
as it may change without notice or warning in future releases.
60+
Patch by Adam Turner.
5761

5862
Bugs fixed
5963
----------

sphinx/ext/intersphinx/_cli.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,11 @@
55
import sys
66
from pathlib import Path
77

8-
from sphinx.ext.intersphinx._load import _fetch_inventory, _InvConfig
8+
from sphinx.ext.intersphinx._load import (
9+
_fetch_inventory_data,
10+
_InvConfig,
11+
_load_inventory,
12+
)
913

1014

1115
def inspect_main(argv: list[str], /) -> int:
@@ -28,12 +32,14 @@ def inspect_main(argv: list[str], /) -> int:
2832
)
2933

3034
try:
31-
inv = _fetch_inventory(
35+
raw_data, _ = _fetch_inventory_data(
3236
target_uri='',
3337
inv_location=filename,
3438
config=config,
3539
srcdir=Path(),
40+
cache_path=None,
3641
)
42+
inv = _load_inventory(raw_data, target_uri='')
3743
for key in sorted(inv.data):
3844
print(key)
3945
inv_entries = sorted(inv.data[key].items())

sphinx/ext/intersphinx/_load.py

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,9 @@ def load_mappings(app: Sphinx) -> None:
181181
now=now,
182182
config=inv_config,
183183
srcdir=app.srcdir,
184+
# the location of this cache directory must not be relied upon
185+
# externally, it may change without notice or warning.
186+
cache_dir=app.doctreedir / '__intersphinx_cache__',
184187
)
185188
for project in projects
186189
]
@@ -230,6 +233,7 @@ def _fetch_inventory_group(
230233
now: int,
231234
config: _InvConfig,
232235
srcdir: Path,
236+
cache_dir: Path | None,
233237
) -> bool:
234238
if config.intersphinx_cache_limit >= 0:
235239
# Positive value: cache is expired if its timestamp is below
@@ -250,6 +254,25 @@ def _fetch_inventory_group(
250254
else:
251255
inv_location = location
252256

257+
if cache_dir is not None:
258+
cache_path = cache_dir / f'{project.name}_{INVENTORY_FILENAME}'
259+
else:
260+
cache_path = None
261+
262+
if (
263+
cache_path is not None
264+
and '://' in inv_location
265+
and project.target_uri not in cache
266+
and cache_path.is_file()
267+
# the saved 'objects.inv' is not older than the cache expiry time
268+
and cache_path.stat().st_mtime >= cache_time
269+
):
270+
raw_data = cache_path.read_bytes()
271+
inv = _load_inventory(raw_data, target_uri=project.target_uri)
272+
cache_path_mtime = int(cache_path.stat().st_mtime)
273+
cache[project.target_uri] = project.name, cache_path_mtime, inv.data
274+
break
275+
253276
# decide whether the inventory must be read: always read local
254277
# files; remote ones only if the cache time is expired
255278
if (
@@ -264,17 +287,18 @@ def _fetch_inventory_group(
264287
)
265288

266289
try:
267-
inv = _fetch_inventory(
290+
raw_data, target_uri = _fetch_inventory_data(
268291
target_uri=project.target_uri,
269292
inv_location=inv_location,
270293
config=config,
271294
srcdir=srcdir,
295+
cache_path=cache_path,
272296
)
297+
inv = _load_inventory(raw_data, target_uri=target_uri)
273298
except Exception as err:
274299
failures.append(err.args)
275300
continue
276-
277-
if inv:
301+
else:
278302
cache[project.target_uri] = project.name, now, inv.data
279303
updated = True
280304
break
@@ -302,18 +326,25 @@ def _fetch_inventory_group(
302326

303327
def fetch_inventory(app: Sphinx, uri: InventoryURI, inv: str) -> Inventory:
304328
"""Fetch, parse and return an intersphinx inventory file."""
305-
return _fetch_inventory(
329+
raw_data, uri = _fetch_inventory_data(
306330
target_uri=uri,
307331
inv_location=inv,
308332
config=_InvConfig.from_config(app.config),
309333
srcdir=app.srcdir,
310-
).data
334+
cache_path=None,
335+
)
336+
return _load_inventory(raw_data, target_uri=uri).data
311337

312338

313-
def _fetch_inventory(
314-
*, target_uri: InventoryURI, inv_location: str, config: _InvConfig, srcdir: Path
315-
) -> _Inventory:
316-
"""Fetch, parse and return an intersphinx inventory file."""
339+
def _fetch_inventory_data(
340+
*,
341+
target_uri: InventoryURI,
342+
inv_location: str,
343+
config: _InvConfig,
344+
srcdir: Path,
345+
cache_path: Path | None,
346+
) -> tuple[bytes, str]:
347+
"""Fetch inventory data from a local or remote source."""
317348
# both *target_uri* (base URI of the links to generate)
318349
# and *inv_location* (actual location of the inventory file)
319350
# can be local or remote URIs
@@ -324,9 +355,17 @@ def _fetch_inventory(
324355
raw_data, target_uri = _fetch_inventory_url(
325356
target_uri=target_uri, inv_location=inv_location, config=config
326357
)
358+
if cache_path is not None:
359+
cache_path.parent.mkdir(parents=True, exist_ok=True)
360+
cache_path.write_bytes(raw_data)
327361
else:
328362
raw_data = _fetch_inventory_file(inv_location=inv_location, srcdir=srcdir)
363+
return raw_data, target_uri
364+
329365

366+
def _load_inventory(raw_data: bytes, /, *, target_uri: InventoryURI) -> _Inventory:
367+
"""Parse and return an intersphinx inventory file."""
368+
# *target_uri* (base URI of the links to generate) can be a local or remote URI
330369
try:
331370
inv = InventoryFile.loads(raw_data, uri=target_uri)
332371
except ValueError as exc:

tests/test_extensions/test_ext_intersphinx.py

Lines changed: 47 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,11 @@
1818
from sphinx.ext.intersphinx import setup as intersphinx_setup
1919
from sphinx.ext.intersphinx._cli import inspect_main
2020
from sphinx.ext.intersphinx._load import (
21-
_fetch_inventory,
21+
_fetch_inventory_data,
2222
_fetch_inventory_group,
2323
_get_safe_url,
2424
_InvConfig,
25+
_load_inventory,
2526
_strip_basic_auth,
2627
load_mappings,
2728
validate_intersphinx_mapping,
@@ -85,12 +86,15 @@ def test_fetch_inventory_redirection(get_request, InventoryFile, app):
8586

8687
# same uri and inv, not redirected
8788
mocked_get.url = 'https://hostname/' + INVENTORY_FILENAME
88-
_fetch_inventory(
89-
target_uri='https://hostname/',
89+
target_uri = 'https://hostname/'
90+
raw_data, target_uri = _fetch_inventory_data(
91+
target_uri=target_uri,
9092
inv_location='https://hostname/' + INVENTORY_FILENAME,
9193
config=_InvConfig.from_config(app.config),
9294
srcdir=app.srcdir,
95+
cache_path=None,
9396
)
97+
_load_inventory(raw_data, target_uri=target_uri)
9498
assert 'intersphinx inventory has moved' not in app.status.getvalue()
9599
assert InventoryFile.loads.call_args[1]['uri'] == 'https://hostname/'
96100

@@ -99,12 +103,15 @@ def test_fetch_inventory_redirection(get_request, InventoryFile, app):
99103
app.status.truncate(0)
100104
mocked_get.url = 'https://hostname/new/' + INVENTORY_FILENAME
101105

102-
_fetch_inventory(
103-
target_uri='https://hostname/',
106+
target_uri = 'https://hostname/'
107+
raw_data, target_uri = _fetch_inventory_data(
108+
target_uri=target_uri,
104109
inv_location='https://hostname/' + INVENTORY_FILENAME,
105110
config=_InvConfig.from_config(app.config),
106111
srcdir=app.srcdir,
112+
cache_path=None,
107113
)
114+
_load_inventory(raw_data, target_uri=target_uri)
108115
assert app.status.getvalue() == (
109116
'intersphinx inventory has moved: '
110117
'https://hostname/%s -> https://hostname/new/%s\n'
@@ -117,12 +124,15 @@ def test_fetch_inventory_redirection(get_request, InventoryFile, app):
117124
app.status.truncate(0)
118125
mocked_get.url = 'https://hostname/new/' + INVENTORY_FILENAME
119126

120-
_fetch_inventory(
121-
target_uri='https://hostname/',
127+
target_uri = 'https://hostname/'
128+
raw_data, target_uri = _fetch_inventory_data(
129+
target_uri=target_uri,
122130
inv_location='https://hostname/new/' + INVENTORY_FILENAME,
123131
config=_InvConfig.from_config(app.config),
124132
srcdir=app.srcdir,
133+
cache_path=None,
125134
)
135+
_load_inventory(raw_data, target_uri=target_uri)
126136
assert 'intersphinx inventory has moved' not in app.status.getvalue()
127137
assert InventoryFile.loads.call_args[1]['uri'] == 'https://hostname/'
128138

@@ -131,12 +141,15 @@ def test_fetch_inventory_redirection(get_request, InventoryFile, app):
131141
app.status.truncate(0)
132142
mocked_get.url = 'https://hostname/other/' + INVENTORY_FILENAME
133143

134-
_fetch_inventory(
135-
target_uri='https://hostname/',
144+
target_uri = 'https://hostname/'
145+
raw_data, target_uri = _fetch_inventory_data(
146+
target_uri=target_uri,
136147
inv_location='https://hostname/new/' + INVENTORY_FILENAME,
137148
config=_InvConfig.from_config(app.config),
138149
srcdir=app.srcdir,
150+
cache_path=None,
139151
)
152+
_load_inventory(raw_data, target_uri=target_uri)
140153
assert app.status.getvalue() == (
141154
'intersphinx inventory has moved: '
142155
'https://hostname/new/%s -> https://hostname/other/%s\n'
@@ -774,13 +787,16 @@ def test_intersphinx_cache_limit(app, monkeypatch, cache_limit, expected_expired
774787
now = 2 * 86400
775788
monkeypatch.setattr('time.time', lambda: now)
776789

777-
# `_fetch_inventory_group` calls `_fetch_inventory`.
790+
# `_fetch_inventory_group` calls `_fetch_inventory_data`.
778791
# We replace it with a mock to test whether it has been called.
779792
# If it has been called, it means the cache had expired.
780-
mock_fake_inventory = _Inventory({}) # must be truthy
781-
mock_fetch_inventory = mock.Mock(return_value=mock_fake_inventory)
782793
monkeypatch.setattr(
783-
'sphinx.ext.intersphinx._load._fetch_inventory', mock_fetch_inventory
794+
'sphinx.ext.intersphinx._load._fetch_inventory_data',
795+
mock.Mock(return_value=(b'', '')),
796+
)
797+
mock_fetch_inventory = mock.Mock(return_value=_Inventory({}))
798+
monkeypatch.setattr(
799+
'sphinx.ext.intersphinx._load._load_inventory', mock_fetch_inventory
784800
)
785801

786802
for name, (uri, locations) in app.config.intersphinx_mapping.values():
@@ -791,8 +807,9 @@ def test_intersphinx_cache_limit(app, monkeypatch, cache_limit, expected_expired
791807
now=now,
792808
config=_InvConfig.from_config(app.config),
793809
srcdir=app.srcdir,
810+
cache_dir=None,
794811
)
795-
# If we hadn't mocked `_fetch_inventory`, it would've made
812+
# If we hadn't mocked `_fetch_inventory_data`, it would've made
796813
# a request to `https://example.org/` and found no inventory
797814
# file. That would've been an error, and `updated` would've been
798815
# False even if the cache had expired. The mock makes it behave
@@ -826,8 +843,14 @@ def log_message(*args, **kwargs):
826843
}
827844

828845
now = int(time.time())
829-
# we can use 'srcdir=None' since we are raising in _fetch_inventory
830-
kwds = {'cache': {}, 'now': now, 'config': config, 'srcdir': None}
846+
# we can use 'srcdir=None' since we are raising in _fetch_inventory_data
847+
kwds = {
848+
'cache': {},
849+
'now': now,
850+
'config': config,
851+
'srcdir': None,
852+
'cache_dir': None,
853+
}
831854
# We need an exception with its 'args' attribute set (see error
832855
# handling in sphinx.ext.intersphinx._load._fetch_inventory_group).
833856
side_effect = ValueError('')
@@ -836,38 +859,44 @@ def log_message(*args, **kwargs):
836859
name='1', target_uri=url1, locations=(url1, None)
837860
)
838861
with mock.patch(
839-
'sphinx.ext.intersphinx._load._fetch_inventory', side_effect=side_effect
862+
'sphinx.ext.intersphinx._load._fetch_inventory_data',
863+
side_effect=side_effect,
840864
) as mockfn:
841865
assert not _fetch_inventory_group(project=project1, **kwds)
842866
mockfn.assert_any_call(
843867
target_uri=url1,
844868
inv_location=url1,
845869
config=config,
846870
srcdir=None,
871+
cache_path=None,
847872
)
848873
mockfn.assert_any_call(
849874
target_uri=url1,
850875
inv_location=url1 + '/' + INVENTORY_FILENAME,
851876
config=config,
852877
srcdir=None,
878+
cache_path=None,
853879
)
854880

855881
project2 = _IntersphinxProject(
856882
name='2', target_uri=url2, locations=(url2, None)
857883
)
858884
with mock.patch(
859-
'sphinx.ext.intersphinx._load._fetch_inventory', side_effect=side_effect
885+
'sphinx.ext.intersphinx._load._fetch_inventory_data',
886+
side_effect=side_effect,
860887
) as mockfn:
861888
assert not _fetch_inventory_group(project=project2, **kwds)
862889
mockfn.assert_any_call(
863890
target_uri=url2,
864891
inv_location=url2,
865892
config=config,
866893
srcdir=None,
894+
cache_path=None,
867895
)
868896
mockfn.assert_any_call(
869897
target_uri=url2,
870898
inv_location=url2 + INVENTORY_FILENAME,
871899
config=config,
872900
srcdir=None,
901+
cache_path=None,
873902
)

tests/test_extensions/test_ext_intersphinx_cache.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import posixpath
66
import re
7+
import shutil
78
import zlib
89
from http.server import BaseHTTPRequestHandler
910
from io import BytesIO
@@ -261,12 +262,14 @@ def test_load_mappings_cache_update(tmp_path):
261262
app1 = SphinxTestApp('dummy', srcdir=tmp_path, confoverrides=confoverrides1)
262263
app1.build()
263264
app1.cleanup()
265+
shutil.rmtree(app1.doctreedir / '__intersphinx_cache__', ignore_errors=True)
264266

265267
# switch to new url and assert that the old URL is no more stored
266268
confoverrides2 = BASE_CONFIG | {'intersphinx_mapping': new_project.record}
267269
app2 = SphinxTestApp('dummy', srcdir=tmp_path, confoverrides=confoverrides2)
268270
app2.build()
269271
app2.cleanup()
272+
shutil.rmtree(app2.doctreedir / '__intersphinx_cache__', ignore_errors=True)
270273

271274
entry = new_project.make_entry()
272275
item = dict((new_project.normalise(entry),))

0 commit comments

Comments
 (0)