Skip to content

Commit 9c4902f

Browse files
authored
Deprecate replacing non-decodable source bytes (#13679)
1 parent 5cf62e5 commit 9c4902f

File tree

3 files changed

+17
-12
lines changed

3 files changed

+17
-12
lines changed

CHANGES.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ Deprecated
2626
* #13665: Deprecate support for non-UTF 8 source encodings,
2727
scheduled for removal in Sphinx 10.
2828
Patch by Adam Turner.
29+
* #13679: Non-decodable characters in source files will raise an error in Sphinx 9.
30+
Currently, such bytes are replaced with '?' along with logging a warning.
31+
Patch by Adam Turner.
2932

3033
Features added
3134
--------------

sphinx/builders/__init__.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,7 @@ def read_doc(self, docname: str, *, _cache: bool = True) -> None:
642642
filename = env.doc2path(docname)
643643

644644
# set up error_handler for the target document
645+
# xref RemovedInSphinx90Warning
645646
error_handler = _UnicodeDecodeErrorHandler(docname)
646647
codecs.register_error('sphinx', error_handler) # type: ignore[arg-type]
647648

@@ -903,20 +904,21 @@ def __init__(self, docname: str, /) -> None:
903904
self.docname = docname
904905

905906
def __call__(self, error: UnicodeDecodeError) -> tuple[str, int]:
906-
line_start = error.object.rfind(b'\n', 0, error.start)
907-
line_end = error.object.find(b'\n', error.start)
907+
obj = error.object
908+
line_start = obj.rfind(b'\n', 0, error.start)
909+
line_end = obj.find(b'\n', error.start)
908910
if line_end == -1:
909-
line_end = len(error.object)
910-
line_num = error.object.count(b'\n', 0, error.start) + 1
911+
line_end = len(obj)
912+
line_num = obj.count(b'\n', 0, error.start) + 1
911913
logger.warning(
912-
__('undecodable source characters, replacing with "?": %r'),
913-
(
914-
error.object[line_start + 1 : error.start]
915-
+ b'>>>'
916-
+ error.object[error.start : error.end]
917-
+ b'<<<'
918-
+ error.object[error.end : line_end]
914+
__(
915+
"undecodable source characters, replacing with '?': '%s>>>%s<<<%s'. "
916+
'This will become an error in Sphinx 9.0.'
917+
# xref RemovedInSphinx90Warning
919918
),
919+
obj[line_start + 1 : error.start].decode(errors='backslashreplace'),
920+
obj[error.start : error.end].decode(errors='backslashreplace'),
921+
obj[error.end : line_end].decode(errors='backslashreplace'),
920922
location=(self.docname, line_num),
921923
)
922924
return '?', error.end

tests/test_builders/test_build_warnings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
{root}/index.rst:\\d+: WARNING: image file not readable: foo.png \\[image.not_readable\\]
2424
{root}/index.rst:\\d+: WARNING: download file not readable: {root}/nonexisting.png \\[download.not_readable\\]
2525
{root}/undecodable.rst:\\d+: WARNING: undecodable source characters, replacing \
26-
with "\\?": b?'here: >>>(\\\\|/)xbb<<<((\\\\|/)r)?'
26+
with '\\?': 'here: >>>(\\\\|/)xbb<<<'\\. This will become an error in Sphinx 9\\.0\\.
2727
"""
2828

2929
HTML_WARNINGS = (

0 commit comments

Comments
 (0)