Skip to content

Commit ffd406a

Browse files
ROB: Gracefully handle odd-length strings in parse_bfchar (#3348)
Closes #3347.
1 parent dfadde5 commit ffd406a

File tree

2 files changed

+19
-4
lines changed

2 files changed

+19
-4
lines changed

pypdf/_cmap.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import binascii
2+
from binascii import Error as BinasciiError
23
from binascii import unhexlify
34
from math import ceil
45
from typing import Any, Dict, List, Tuple, Union, cast
@@ -383,9 +384,12 @@ def parse_bfchar(line: bytes, map_dict: Dict[Any, Any], int_entry: List[int]) ->
383384
map_to = ""
384385
# placeholder (see above) means empty string
385386
if lst[1] != b".":
386-
map_to = unhexlify(lst[1]).decode(
387-
"charmap" if len(lst[1]) < 4 else "utf-16-be", "surrogatepass"
388-
) # join is here as some cases where the code was split
387+
try:
388+
map_to = unhexlify(lst[1]).decode(
389+
"charmap" if len(lst[1]) < 4 else "utf-16-be", "surrogatepass"
390+
) # join is here as some cases where the code was split
391+
except BinasciiError as exception:
392+
logger_warning(f"Got invalid hex string: {exception!s} ({lst[1]!r})", __name__)
389393
map_dict[
390394
unhexlify(lst[0]).decode(
391395
"charmap" if map_dict[-1] == 1 else "utf-16-be", "surrogatepass"

tests/test_cmap.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import pytest
66

77
from pypdf import PdfReader, PdfWriter
8-
from pypdf._cmap import build_char_map, get_encoding
8+
from pypdf._cmap import build_char_map, get_encoding, parse_bfchar
99
from pypdf._codecs import charset_encoding
1010
from pypdf.generic import ArrayObject, DictionaryObject, IndirectObject, NameObject, NullObject
1111

@@ -327,3 +327,14 @@ def test_get_encoding__encoding_value_is_none():
327327
dict(zip(range(256), charset_encoding["/StandardEncoding"])),
328328
{}
329329
)
330+
331+
332+
def test_parse_bfchar(caplog):
333+
map_dict = {}
334+
int_entry = []
335+
parse_bfchar(line=b"057e 1337", map_dict=map_dict, int_entry=int_entry)
336+
parse_bfchar(line=b"056e 1f310", map_dict=map_dict, int_entry=int_entry)
337+
338+
assert map_dict == {-1: 2, "ծ": "", "վ": "ጷ"}
339+
assert int_entry == [1406, 1390]
340+
assert caplog.messages == ["Got invalid hex string: Odd-length string (b'1f310')"]

0 commit comments

Comments
 (0)