From 8df2dfa1d79f0a710168da7956722982b50858b5 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 30 Jul 2023 15:02:51 +0200
Subject: [PATCH 01/14] ENH : Process /uniHHHH for text_extract

/uniHHHH glyphs seems to be generated in laTeX but is ok for other characters
addressed partially in  #2016
---
 pypdf/_cmap.py           | 15 ++++++++++++---
 tests/test_cmap.py       | 10 ++++++++++
 tests/test_encryption.py | 22 +++++++++++++++++-----
 3 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/pypdf/_cmap.py b/pypdf/_cmap.py
index 14c1e229c..788a1ac19 100644
--- a/pypdf/_cmap.py
+++ b/pypdf/_cmap.py
@@ -461,10 +461,19 @@ def type1_alternative(
                 continue
             try:
                 i = int(words[1])
-                v = adobe_glyphs[words[2].decode()]
-            except (ValueError, KeyError):
+            except ValueError:
                 continue
-            if v == " ":
+            try:
+                v = adobe_glyphs[words[2].decode()]
+            except KeyError:
+                if words[2].startswith(b"/uni"):
+                    try:
+                        v = chr(int(words[2][4:], 16))
+                    except ValueError:
+                        continue
+                else:
+                    continue
+            if words[2].decode() == b" ":
                 space_code = i
             map_dict[chr(i)] = v
             int_entry.append(i)
diff --git a/tests/test_cmap.py b/tests/test_cmap.py
index f74da326d..6e7448651 100644
--- a/tests/test_cmap.py
+++ b/tests/test_cmap.py
@@ -179,3 +179,13 @@ def test_latex():
     for pat in ("α", "β", "γ", "ϕ", "φ", "ℏ", "∫", "∂", "·", "×"):
         assert pat in txt
     # actually the ϕ and φ seems to be crossed in latex
+
+
+@pytest.mark.enable_socket()
+def test_unixxx_glyphs():
+    url = "https://arxiv.org/pdf/2201.00021.pdf"
+    name = "unixxx_glyphs.pdf"
+    reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
+    txt = reader.pages[0].extract_text()  # no error
+    for pat in ("闫耀庭", "龚龑", "张江水", "1′′.2"):
+        assert pat in txt
diff --git a/tests/test_encryption.py b/tests/test_encryption.py
index ff33d2121..6641977a4 100644
--- a/tests/test_encryption.py
+++ b/tests/test_encryption.py
@@ -125,7 +125,10 @@ def test_encryption(name, requires_pycryptodome):
         ("r6-both-passwords.pdf", "foo", "bar"),
     ],
 )
-@pytest.mark.skipif(not HAS_PYCRYPTODOME and not HAS_CRYPTOGRAPHY, reason="No pycryptodome / cryptography")
+@pytest.mark.skipif(
+    not HAS_PYCRYPTODOME and not HAS_CRYPTOGRAPHY,
+    reason="No pycryptodome / cryptography",
+)
 def test_pdf_with_both_passwords(name, user_passwd, owner_passwd):
     """
     PDFs with both user and owner passwords are handled correctly.
@@ -151,7 +154,10 @@ def test_pdf_with_both_passwords(name, user_passwd, owner_passwd):
         ("crazyones-encrypted-256.pdf", b"password"),
     ],
 )
-@pytest.mark.skipif(not HAS_PYCRYPTODOME and not HAS_CRYPTOGRAPHY, reason="No pycryptodome / cryptography")
+@pytest.mark.skipif(
+    not HAS_PYCRYPTODOME and not HAS_CRYPTOGRAPHY,
+    reason="No pycryptodome / cryptography",
+)
 def test_read_page_from_encrypted_file_aes_256(pdffile, password):
     """
     A page can be read from an encrypted.
@@ -176,7 +182,10 @@ def test_read_page_from_encrypted_file_aes_256(pdffile, password):
         ),
     ],
 )
-@pytest.mark.skipif(not HAS_PYCRYPTODOME and not HAS_CRYPTOGRAPHY, reason="No pycryptodome / cryptography")
+@pytest.mark.skipif(
+    not HAS_PYCRYPTODOME and not HAS_CRYPTOGRAPHY,
+    reason="No pycryptodome / cryptography",
+)
 @pytest.mark.filterwarnings("ignore::DeprecationWarning")
 def test_merge_encrypted_pdfs(names):
     """Encrypted PDFs can be merged after decryption."""
@@ -193,7 +202,7 @@ def test_merge_encrypted_pdfs(names):
 
 @pytest.mark.skipif(
     HAS_CRYPTOGRAPHY,
-    reason="Limitations of cryptography. see https://github.com/pyca/cryptography/issues/2494"
+    reason="Limitations of cryptography. see https://github.com/pyca/cryptography/issues/2494",
 )
 @pytest.mark.parametrize(
     "cryptcls",
@@ -346,7 +355,10 @@ def test_pdf_encrypt_multiple(pdf_file_path, count):
     assert text0 == text1
 
 
-@pytest.mark.skipif(not HAS_PYCRYPTODOME and not HAS_CRYPTOGRAPHY, reason="No pycryptodome / cryptography")
+@pytest.mark.skipif(
+    not HAS_PYCRYPTODOME and not HAS_CRYPTOGRAPHY,
+    reason="No pycryptodome / cryptography",
+)
 def test_aes_decrypt_corrupted_data():
     """Just for robustness"""
     aes = CryptAES(secrets.token_bytes(16))

From 21af042a3fe6d57fd677af3162dccc169d68a536 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 30 Jul 2023 15:35:13 +0200
Subject: [PATCH 02/14] coverage

---
 pypdf/_cmap.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pypdf/_cmap.py b/pypdf/_cmap.py
index 788a1ac19..6400f89b1 100644
--- a/pypdf/_cmap.py
+++ b/pypdf/_cmap.py
@@ -461,7 +461,7 @@ def type1_alternative(
                 continue
             try:
                 i = int(words[1])
-            except ValueError:
+            except ValueError:  # pragma: no cover
                 continue
             try:
                 v = adobe_glyphs[words[2].decode()]
@@ -469,7 +469,7 @@ def type1_alternative(
                 if words[2].startswith(b"/uni"):
                     try:
                         v = chr(int(words[2][4:], 16))
-                    except ValueError:
+                    except ValueError:  # pragma: no cover
                         continue
                 else:
                     continue

From 1b78427de2c19c2151b38ff3e6e47cb29c6ead93 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 30 Jul 2023 21:22:56 +0200
Subject: [PATCH 03/14] BUG : writing german characters into fields

closes #2035
closes #2021
---
 pypdf/_cmap.py       | 29 ++++++++++++++----
 pypdf/_writer.py     | 70 ++++++++++++++++++++++++++++++++++++--------
 tests/test_writer.py | 23 +++++++++++++++
 3 files changed, 104 insertions(+), 18 deletions(-)

diff --git a/pypdf/_cmap.py b/pypdf/_cmap.py
index 6400f89b1..b5311c5fb 100644
--- a/pypdf/_cmap.py
+++ b/pypdf/_cmap.py
@@ -17,15 +17,35 @@ def build_char_map(
     Determine information about a font.
 
     Args:
-        font_name:
-        space_width:
-        obj:
+        font_name: font name as a string
+        space_width: default space with if no data found.
+        obj: XObject or Page where you can find a /Resource dictionary
 
     Returns:
-        Font sub-type, space_width/2, encoding, map character-map, font-dictionary.
+        Font sub-type, space_width criteria(50% of width), encoding, map character-map, font-dictionary.
         The font-dictionary itself is suitable for the curious.
     """
     ft: DictionaryObject = obj["/Resources"]["/Font"][font_name]  # type: ignore
+    font_subtype, font_halfspace, font_encoding, font_map = build_char_map_from_dict(
+        space_width, ft
+    )
+    return font_subtype, font_halfspace, font_encoding, font_map, ft
+
+
+def build_char_map_from_dict(
+    space_width: float, ft: DictionaryObject
+) -> Tuple[str, float, Union[str, Dict[int, str]], Dict]:
+    """
+    Determine information about a font.
+
+    Args:
+        space_width: default space with if no data found (normally half width of char.
+        ft: Font Dictionary
+
+    Returns:
+        Font sub-type, space_width criteria(50% of width), encoding, map character-map.
+        The font-dictionary itself is suitable for the curious.
+    """
     font_type: str = cast(str, ft["/Subtype"])
 
     space_code = 32
@@ -73,7 +93,6 @@ def build_char_map(
         encoding,
         # https://github.com/python/mypy/issues/4374
         map_dict,
-        ft,
     )
 
 
diff --git a/pypdf/_writer.py b/pypdf/_writer.py
index ec4896894..7054fb62a 100644
--- a/pypdf/_writer.py
+++ b/pypdf/_writer.py
@@ -54,6 +54,7 @@
     cast,
 )
 
+from ._cmap import build_char_map_from_dict
 from ._encryption import EncryptAlgorithm, Encryption
 from ._page import PageObject, _VirtualList
 from ._page_labels import nums_clear_range, nums_insert, nums_next
@@ -847,6 +848,47 @@ def _update_text_field(self, field: DictionaryObject) -> None:
             da = " ".join(font_properties)
         y_offset = rct.height - 1 - font_height
 
+        # Retrieve font information from local DR ...
+        dr: Any = cast(dict, cast(DictionaryObject, field.get("/DR", {})))
+        if isinstance(dr, IndirectObject):
+            dr = dr.get_object()
+        dr = dr.get("/Font", {})
+        if isinstance(dr, IndirectObject):
+            dr = dr.get_object()
+        if font_name not in dr:
+            # ...or AcroForm dictionary
+            dr = cast(
+                dict,
+                cast(DictionaryObject, self._root_object["/AcroForm"]).get("/DR", {}),
+            )
+            if isinstance(dr, IndirectObject):
+                dr = dr.get_object()
+            dr = dr.get("/Font", {})
+            if isinstance(dr, IndirectObject):
+                dr = dr.get_object()
+        font_res = dr.get(font_name)
+        if font_res is not None:
+            font_res = cast(DictionaryObject, font_res.get_object())
+            font_subtype, _, font_encoding, font_map = build_char_map_from_dict(
+                200, font_res
+            )
+            font_full_rev: dict[str, int]
+            if isinstance(font_encoding, str):
+                if font_encoding not in ("charmap", "utf-16-be"):
+                    logger_warning(
+                        f"unexpected {font_encoding} : please share pdf with pypdf dev team",
+                        __name__,
+                    )
+                font_full_rev = {v: k for k, v in font_map.items()}
+            else:
+                font_full_rev = {v: k for k, v in font_encoding.items()}
+                font_encoding_rev = {v: k for k, v in font_encoding.items()}
+                for k, v in font_map.items():
+                    font_full_rev[v] = font_encoding_rev.get(k, ord(k))
+        else:
+            logger_warning(f"can not find font dictionnary for {font_name}", __name__)
+            font_full_rev = {}
+
         # Retrieve field text and selected values
         field_flags = field.get(FA.Ff, 0)
         if field.get(FA.FT, "/Tx") == "/Ch" and field_flags & FA.FfBits.Combo == 0:
@@ -872,7 +914,15 @@ def _update_text_field(self, field: DictionaryObject) -> None:
             else:
                 # Td is a relative translation
                 ap_stream += f"0 {- font_height * 1.4} Td\n".encode()
-            ap_stream += b"(" + str(line).encode("UTF-8") + b") Tj\n"
+            enc_line: list[Any] = [font_full_rev.get(c, ord(c)) for c in line]
+            if all(c > 255 for c in enc_line):
+                ap_stream += (
+                    b"<"
+                    + b"".join(b"%04X" % x for x in line.encode("UTF-16-BE"))
+                    + b"> Tj\n"
+                )
+            else:
+                ap_stream += b"(" + bytes(enc_line) + b") Tj\n"
         ap_stream += b"ET\nQ\nEMC\nQ\n"
 
         # Create appearance dictionary
@@ -886,22 +936,16 @@ def _update_text_field(self, field: DictionaryObject) -> None:
             }
         )
 
-        # Retrieve font information from AcroForm dictionary
-        dr: Any = cast(
-            dict, cast(DictionaryObject, self._root_object["/AcroForm"]).get("/DR", {})
-        )
-        if isinstance(dr, IndirectObject):
-            dr = dr.get_object()
-        dr = dr.get("/Font", {})
-        if isinstance(dr, IndirectObject):
-            dr = dr.get_object()
-
         # Update Resources with font information if necessary
-        if font_name in dr:
+        if font_res is not None:
             dct[NameObject("/Resources")] = DictionaryObject(
                 {
                     NameObject("/Font"): DictionaryObject(
-                        {NameObject(font_name): dr[font_name].indirect_reference}
+                        {
+                            NameObject(font_name): getattr(
+                                font_res, "indirect_reference", font_res
+                            )
+                        }
                     )
                 }
             )
diff --git a/tests/test_writer.py b/tests/test_writer.py
index ebeaf60e0..5c1dd51d3 100644
--- a/tests/test_writer.py
+++ b/tests/test_writer.py
@@ -1582,3 +1582,26 @@ def test_missing_fields(pdf_file_path):
             writer.pages[0], {"foo": "some filled in text"}, flags=1
         )
     assert exc.value.args[0] == "No /Fields dictionary in Pdf in PdfWriter Object"
+
+
+@pytest.mark.enable_socket()
+def test_germanfields():
+    """Cf #2035"""
+    url = "https://github.com/py-pdf/pypdf/files/12194195/test.pdf"
+    name = "germanfields.pdf"
+    reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
+    writer = PdfWriter(clone_from=reader)
+    form_fields = {"Text Box 1": "test æ ø å"}
+    writer.update_page_form_field_values(
+        writer.pages[0], form_fields, auto_regenerate=False
+    )
+    bytes_stream = BytesIO()
+    writer.write(bytes_stream)
+    bytes_stream.seek(0)
+    reader2 = PdfReader(bytes_stream)
+    assert (
+        b"test \xe6 \xf8 \xe5"
+        in reader2.get_fields()["Text Box 1"]
+        .indirect_reference.get_object()["/AP"]["/N"]
+        .get_data()
+    )

From 284da98a007d27d02b5fefdbee2c99cfd6bd0a1f Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 30 Jul 2023 21:41:37 +0200
Subject: [PATCH 04/14] mypy

---
 pypdf/_writer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pypdf/_writer.py b/pypdf/_writer.py
index 7054fb62a..f269ebdf3 100644
--- a/pypdf/_writer.py
+++ b/pypdf/_writer.py
@@ -872,7 +872,7 @@ def _update_text_field(self, field: DictionaryObject) -> None:
             font_subtype, _, font_encoding, font_map = build_char_map_from_dict(
                 200, font_res
             )
-            font_full_rev: dict[str, int]
+            font_full_rev: Dict[str, int]
             if isinstance(font_encoding, str):
                 if font_encoding not in ("charmap", "utf-16-be"):
                     logger_warning(
@@ -914,7 +914,7 @@ def _update_text_field(self, field: DictionaryObject) -> None:
             else:
                 # Td is a relative translation
                 ap_stream += f"0 {- font_height * 1.4} Td\n".encode()
-            enc_line: list[Any] = [font_full_rev.get(c, ord(c)) for c in line]
+            enc_line: List[Any] = [font_full_rev.get(c, ord(c)) for c in line]
             if all(c > 255 for c in enc_line):
                 ap_stream += (
                     b"<"

From 807212b9f2f40d79caf562d7420b80502ff56634 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 30 Jul 2023 22:40:14 +0200
Subject: [PATCH 05/14] Update pypdf/_cmap.py

Co-authored-by: Martin Thoma <info@martin-thoma.de>
---
 pypdf/_cmap.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypdf/_cmap.py b/pypdf/_cmap.py
index b5311c5fb..b9ab31941 100644
--- a/pypdf/_cmap.py
+++ b/pypdf/_cmap.py
@@ -18,7 +18,7 @@ def build_char_map(
 
     Args:
         font_name: font name as a string
-        space_width: default space with if no data found.
+        space_width: default space width if no data is found.
         obj: XObject or Page where you can find a /Resource dictionary
 
     Returns:

From 3ef7e337f527f3e0a1c748f3c6185b6235efb6c1 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 30 Jul 2023 22:41:29 +0200
Subject: [PATCH 06/14] Update pypdf/_cmap.py

Co-authored-by: Martin Thoma <info@martin-thoma.de>
---
 pypdf/_cmap.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypdf/_cmap.py b/pypdf/_cmap.py
index b9ab31941..f7c03a100 100644
--- a/pypdf/_cmap.py
+++ b/pypdf/_cmap.py
@@ -22,7 +22,7 @@ def build_char_map(
         obj: XObject or Page where you can find a /Resource dictionary
 
     Returns:
-        Font sub-type, space_width criteria(50% of width), encoding, map character-map, font-dictionary.
+        Font sub-type, space_width criteria (50% of width), encoding, map character-map, font-dictionary.
         The font-dictionary itself is suitable for the curious.
     """
     ft: DictionaryObject = obj["/Resources"]["/Font"][font_name]  # type: ignore

From 4fc16b6907cd0b163a1dc01c51b990089a9ebb58 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 30 Jul 2023 23:08:56 +0200
Subject: [PATCH 07/14] coverage

---
 pypdf/_writer.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/pypdf/_writer.py b/pypdf/_writer.py
index f269ebdf3..0e345888d 100644
--- a/pypdf/_writer.py
+++ b/pypdf/_writer.py
@@ -850,11 +850,9 @@ def _update_text_field(self, field: DictionaryObject) -> None:
 
         # Retrieve font information from local DR ...
         dr: Any = cast(dict, cast(DictionaryObject, field.get("/DR", {})))
-        if isinstance(dr, IndirectObject):
-            dr = dr.get_object()
-        dr = dr.get("/Font", {})
-        if isinstance(dr, IndirectObject):
+        if isinstance(dr, IndirectObject):  # pragma: no cover
             dr = dr.get_object()
+        dr = dr.get("/Font", {}).get_object()
         if font_name not in dr:
             # ...or AcroForm dictionary
             dr = cast(
@@ -863,9 +861,7 @@ def _update_text_field(self, field: DictionaryObject) -> None:
             )
             if isinstance(dr, IndirectObject):
                 dr = dr.get_object()
-            dr = dr.get("/Font", {})
-            if isinstance(dr, IndirectObject):
-                dr = dr.get_object()
+            dr = dr.get("/Font", {}).get_object()
         font_res = dr.get(font_name)
         if font_res is not None:
             font_res = cast(DictionaryObject, font_res.get_object())
@@ -874,6 +870,7 @@ def _update_text_field(self, field: DictionaryObject) -> None:
             )
             font_full_rev: Dict[str, int]
             if isinstance(font_encoding, str):
+                assert font_encoding in ("charmap", "utf-16-be")
                 if font_encoding not in ("charmap", "utf-16-be"):
                     logger_warning(
                         f"unexpected {font_encoding} : please share pdf with pypdf dev team",
@@ -886,7 +883,8 @@ def _update_text_field(self, field: DictionaryObject) -> None:
                 for k, v in font_map.items():
                     font_full_rev[v] = font_encoding_rev.get(k, ord(k))
         else:
-            logger_warning(f"can not find font dictionnary for {font_name}", __name__)
+            raise AssertionError("can not find font dictionary")
+            logger_warning(f"can not find font dictionary for {font_name}", __name__)
             font_full_rev = {}
 
         # Retrieve field text and selected values
@@ -915,7 +913,7 @@ def _update_text_field(self, field: DictionaryObject) -> None:
                 # Td is a relative translation
                 ap_stream += f"0 {- font_height * 1.4} Td\n".encode()
             enc_line: List[Any] = [font_full_rev.get(c, ord(c)) for c in line]
-            if all(c > 255 for c in enc_line):
+            if any(c > 255 for c in enc_line):
                 ap_stream += (
                     b"<"
                     + b"".join(b"%04X" % x for x in line.encode("UTF-16-BE"))

From 883f4396573f3761629a5255e0185463c02417c1 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 30 Jul 2023 23:32:38 +0200
Subject: [PATCH 08/14] fix

---
 pypdf/_writer.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/pypdf/_writer.py b/pypdf/_writer.py
index 0e345888d..d817d1600 100644
--- a/pypdf/_writer.py
+++ b/pypdf/_writer.py
@@ -849,10 +849,11 @@ def _update_text_field(self, field: DictionaryObject) -> None:
         y_offset = rct.height - 1 - font_height
 
         # Retrieve font information from local DR ...
-        dr: Any = cast(dict, cast(DictionaryObject, field.get("/DR", {})))
-        if isinstance(dr, IndirectObject):  # pragma: no cover
-            dr = dr.get_object()
-        dr = dr.get("/Font", {}).get_object()
+        dr: Any = cast(
+            DictionaryObject,
+            cast(DictionaryObject, field.get("/DR", DictionaryObject())).get_object(),
+        )
+        dr = dr.get("/Font", DictionaryObject()).get_object()
         if font_name not in dr:
             # ...or AcroForm dictionary
             dr = cast(
@@ -861,7 +862,7 @@ def _update_text_field(self, field: DictionaryObject) -> None:
             )
             if isinstance(dr, IndirectObject):
                 dr = dr.get_object()
-            dr = dr.get("/Font", {}).get_object()
+            dr = dr.get("/Font", DictionaryObject()).get_object()
         font_res = dr.get(font_name)
         if font_res is not None:
             font_res = cast(DictionaryObject, font_res.get_object())

From c0fd10c634fe6dfa845d926f2cccd195d23d470e Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Fri, 4 Aug 2023 15:40:24 +0200
Subject: [PATCH 09/14] fix some cases with utf-16

---
 pypdf/_writer.py | 38 ++++++++++++++++++--------------------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/pypdf/_writer.py b/pypdf/_writer.py
index 8958e0b6f..8e3788ee3 100644
--- a/pypdf/_writer.py
+++ b/pypdf/_writer.py
@@ -869,20 +869,20 @@ def _update_text_field(self, field: DictionaryObject) -> None:
             font_subtype, _, font_encoding, font_map = build_char_map_from_dict(
                 200, font_res
             )
-            font_full_rev: Dict[str, int]
+            try:  # get rid of width stored in -1 key
+                del font_map[-1]
+            except KeyError:
+                pass
+            font_full_rev: Dict[str, bytes]
             if isinstance(font_encoding, str):
-                assert font_encoding in ("charmap", "utf-16-be")
-                if font_encoding not in ("charmap", "utf-16-be"):
-                    logger_warning(
-                        f"unexpected {font_encoding} : please share pdf with pypdf dev team",
-                        __name__,
-                    )
-                font_full_rev = {v: k for k, v in font_map.items()}
+                font_full_rev = {
+                    v: k.encode(font_encoding) for k, v in font_map.items()
+                }
             else:
-                font_full_rev = {v: k for k, v in font_encoding.items()}
-                font_encoding_rev = {v: k for k, v in font_encoding.items()}
-                for k, v in font_map.items():
-                    font_full_rev[v] = font_encoding_rev.get(k, ord(k))
+                font_full_rev = {v: bytes((k,)) for k, v in font_encoding.items()}
+                font_encoding_rev = {v: bytes((k,)) for k, v in font_encoding.items()}
+                for kk, v in font_map.items():
+                    font_full_rev[v] = font_encoding_rev.get(kk, kk)
         else:
             raise AssertionError("can not find font dictionary")
             logger_warning(f"can not find font dictionary for {font_name}", __name__)
@@ -913,15 +913,13 @@ def _update_text_field(self, field: DictionaryObject) -> None:
             else:
                 # Td is a relative translation
                 ap_stream += f"0 {- font_height * 1.4} Td\n".encode()
-            enc_line: List[Any] = [font_full_rev.get(c, ord(c)) for c in line]
-            if any(c > 255 for c in enc_line):
-                ap_stream += (
-                    b"<"
-                    + b"".join(b"%04X" % x for x in line.encode("UTF-16-BE"))
-                    + b"> Tj\n"
-                )
+            enc_line: List[bytes] = [
+                font_full_rev.get(c, c.encode("utf-16-be")) for c in line
+            ]
+            if any(len(c) >= 2 for c in enc_line):
+                ap_stream += b"<" + (b"".join(enc_line)).hex().encode() + b"> Tj\n"
             else:
-                ap_stream += b"(" + bytes(enc_line) + b") Tj\n"
+                ap_stream += b"(" + b"".join(enc_line) + b") Tj\n"
         ap_stream += b"ET\nQ\nEMC\nQ\n"
 
         # Create appearance dictionary

From 2b2b1cd2e8ea24cb29c0de01f4c509939f8c9678 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Tue, 1 Aug 2023 19:03:21 +0200
Subject: [PATCH 10/14] ENH : allow to change font name and size in fields
 update

---
 pypdf/_writer.py | 40 ++++++++++++++++++++++++++++++++--------
 1 file changed, 32 insertions(+), 8 deletions(-)

diff --git a/pypdf/_writer.py b/pypdf/_writer.py
index 8e3788ee3..5a73cee07 100644
--- a/pypdf/_writer.py
+++ b/pypdf/_writer.py
@@ -832,7 +832,9 @@ def _get_qualified_field_name(self, parent: DictionaryObject) -> Optional[str]:
                 return qualified_parent + "." + cast(str, parent["/T"])
         return cast(str, parent["/T"])
 
-    def _update_text_field(self, field: DictionaryObject) -> None:
+    def _update_text_field(
+        self, field: DictionaryObject, fontname: str = "", fontsize: float = -1
+    ) -> None:
         # Calculate rectangle dimensions
         _rct = cast(RectangleObject, field[AA.Rect])
         rct = RectangleObject((0, 0, _rct[2] - _rct[0], _rct[3] - _rct[1]))
@@ -840,10 +842,19 @@ def _update_text_field(self, field: DictionaryObject) -> None:
         # Extract font information
         da = cast(str, field[AA.DA])
         font_properties = da.replace("\n", " ").replace("\r", " ").split(" ")
-        font_name = font_properties[font_properties.index("Tf") - 2]
-        font_height = float(font_properties[font_properties.index("Tf") - 1])
-        if font_height == 0:
-            font_height = rct.height - 2
+        font_name = (
+            fontname if fontname else font_properties[font_properties.index("Tf") - 2]
+        )
+        font_height = (
+            fontsize
+            if fontsize >= 0
+            else float(font_properties[font_properties.index("Tf") - 1])
+        )
+        if fontname or fontsize >= 0 or font_height == 0:
+            if fontname:
+                font_properties[font_properties.index("Tf") - 1] = fontname
+            if font_height == 0:
+                font_height = rct.height - 2
             font_properties[font_properties.index("Tf") - 1] = str(font_height)
             da = " ".join(font_properties)
         y_offset = rct.height - 1 - font_height
@@ -975,8 +986,14 @@ def update_page_form_field_values(
         Args:
             page: Page reference from PDF writer where the
                 annotations and field data will be updated.
-            fields: a Python dictionary of field names (/T) and text
-                values (/V)
+            fields: a Python dictionary of :
+                a) field names (/T) as keys and  text values (/V) as value
+                b) field names (/T) as keys and  list of text values (/V)
+                       for multiple choice list
+                c) field names (/T) as keys and  tuple of :
+                       * text values (/V)
+                       * font name (must exist)
+                       * font size (0 for autosize)
             flags: An integer (0 to 7). The first bit sets ReadOnly, the
                 second bit sets Required, the third bit sets NoExport. See
                 PDF Reference Table 8.70 for details.
@@ -1012,6 +1029,10 @@ def update_page_form_field_values(
                     if isinstance(value, list):
                         lst = ArrayObject(TextStringObject(v) for v in value)
                         writer_annot[NameObject(FA.V)] = lst
+                    elif isinstance(value, tuple):
+                        writer_annot[NameObject(FA.V)] = TextStringObject(
+                            value[0],
+                        )
                     else:
                         writer_annot[NameObject(FA.V)] = TextStringObject(value)
                     if writer_annot.get(FA.FT) in ("/Btn"):
@@ -1033,7 +1054,10 @@ def update_page_form_field_values(
                                 if AA.DA in f:
                                     da = f[AA.DA]
                             writer_annot[NameObject(AA.DA)] = da
-                        self._update_text_field(writer_annot)
+                        if isinstance(value, tuple):
+                            self._update_text_field(writer_annot, value[1], value[2])
+                        else:
+                            self._update_text_field(writer_annot)
                     elif writer_annot.get(FA.FT) == "/Sig":
                         # signature
                         logger_warning("Signature forms not implemented yet", __name__)

From 532f015395c938726877ff175dcbb43044b49444 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Fri, 4 Aug 2023 16:52:37 +0200
Subject: [PATCH 11/14] Update pypdf/_cmap.py

Co-authored-by: Martin Thoma <info@martin-thoma.de>
---
 pypdf/_cmap.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pypdf/_cmap.py b/pypdf/_cmap.py
index f7c03a100..04e6c2611 100644
--- a/pypdf/_cmap.py
+++ b/pypdf/_cmap.py
@@ -39,7 +39,8 @@ def build_char_map_from_dict(
     Determine information about a font.
 
     Args:
-        space_width: default space with if no data found (normally half width of char.
+        space_width: default space with if no data found
+             (normally half the width of a character).
         ft: Font Dictionary
 
     Returns:

From 7306998c2ff2597ffeeeae487f5153241a35aae2 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Fri, 4 Aug 2023 16:59:17 +0200
Subject: [PATCH 12/14] coverage

---
 pypdf/_writer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypdf/_writer.py b/pypdf/_writer.py
index 8e3788ee3..c20ff665c 100644
--- a/pypdf/_writer.py
+++ b/pypdf/_writer.py
@@ -860,7 +860,7 @@ def _update_text_field(self, field: DictionaryObject) -> None:
                 dict,
                 cast(DictionaryObject, self._root_object["/AcroForm"]).get("/DR", {}),
             )
-            if isinstance(dr, IndirectObject):
+            if isinstance(dr, IndirectObject):  # pragma: no cover
                 dr = dr.get_object()
             dr = dr.get("/Font", DictionaryObject()).get_object()
         font_res = dr.get(font_name)

From 6e23da5ee4ee200d3be081c36586fbcc2a013b04 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Fri, 4 Aug 2023 23:11:18 +0200
Subject: [PATCH 13/14] fix + test

---
 pypdf/_writer.py     |  7 +++++--
 tests/test_writer.py | 17 +++++++++++++++++
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/pypdf/_writer.py b/pypdf/_writer.py
index 5a73cee07..3eb73cae3 100644
--- a/pypdf/_writer.py
+++ b/pypdf/_writer.py
@@ -852,7 +852,7 @@ def _update_text_field(
         )
         if fontname or fontsize >= 0 or font_height == 0:
             if fontname:
-                font_properties[font_properties.index("Tf") - 1] = fontname
+                font_properties[font_properties.index("Tf") - 2] = fontname
             if font_height == 0:
                 font_height = rct.height - 2
             font_properties[font_properties.index("Tf") - 1] = str(font_height)
@@ -930,7 +930,10 @@ def _update_text_field(
             if any(len(c) >= 2 for c in enc_line):
                 ap_stream += b"<" + (b"".join(enc_line)).hex().encode() + b"> Tj\n"
             else:
-                ap_stream += b"(" + b"".join(enc_line) + b") Tj\n"
+                enc = b"".join(enc_line)
+                # for x in range(32):
+                #    enc = enc.replace(bytes((x,)),b"\%03o"%x)
+                ap_stream += b"(" + enc + b") Tj\n"
         ap_stream += b"ET\nQ\nEMC\nQ\n"
 
         # Create appearance dictionary
diff --git a/tests/test_writer.py b/tests/test_writer.py
index 88f884e4a..7713d9b7c 100644
--- a/tests/test_writer.py
+++ b/tests/test_writer.py
@@ -1612,3 +1612,20 @@ def test_germanfields():
         .indirect_reference.get_object()["/AP"]["/N"]
         .get_data()
     )
+
+
+def test_selfont():
+    writer = PdfWriter(clone_from=RESOURCE_ROOT / "FormTestFromOo.pdf")
+    writer.update_page_form_field_values(
+        writer.pages[0],
+        {"Text1": ("Text", "", 5), "Text2": ("Text", "/F1", 15)},
+        auto_regenerate=False,
+    )
+    assert (
+        b"/F3 5 Tf"
+        in writer.pages[0]["/Annots"][1].get_object()["/AP"]["/N"].get_data()
+    )
+    assert (
+        b"/F1 15 Tf"
+        in writer.pages[0]["/Annots"][2].get_object()["/AP"]["/N"].get_data()
+    )

From defdcd47ddf319b3e4b0e332d3bad9114f496827 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 6 Aug 2023 12:51:51 +0200
Subject: [PATCH 14/14] simplify test

---
 pypdf/_writer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pypdf/_writer.py b/pypdf/_writer.py
index 6001942ea..c1dfb3aaf 100644
--- a/pypdf/_writer.py
+++ b/pypdf/_writer.py
@@ -895,7 +895,6 @@ def _update_text_field(
                 for kk, v in font_map.items():
                     font_full_rev[v] = font_encoding_rev.get(kk, kk)
         else:
-            raise AssertionError("can not find font dictionary")
             logger_warning(f"can not find font dictionary for {font_name}", __name__)
             font_full_rev = {}