Skip to content

Commit b8dd6f3

Browse files
authored
[mypyc] Add support for C string literals in the IR (#19383)
Previously only Python str and bytes literals were supported, but sometimes we want zero-terminated C string literals instead. They don't need to be allocated from the heap and are usually stored in a read-only data section, so they are more efficient in some use cases. These will be useful for a feature I'm working on.
1 parent fa5d942 commit b8dd6f3

File tree

5 files changed

+80
-4
lines changed

5 files changed

+80
-4
lines changed

mypyc/codegen/emitfunc.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
Cast,
3434
ComparisonOp,
3535
ControlOp,
36+
CString,
3637
DecRef,
3738
Extend,
3839
Float,
@@ -850,6 +851,8 @@ def reg(self, reg: Value) -> str:
850851
elif r == "nan":
851852
return "NAN"
852853
return r
854+
elif isinstance(reg, CString):
855+
return '"' + encode_c_string_literal(reg.value) + '"'
853856
else:
854857
return self.emitter.reg(reg)
855858

@@ -911,3 +914,30 @@ def emit_unsigned_int_cast(self, type: RType) -> str:
911914
return "(uint64_t)"
912915
else:
913916
return ""
917+
918+
919+
_translation_table: Final[dict[int, str]] = {}
920+
921+
922+
def encode_c_string_literal(b: bytes) -> str:
923+
"""Convert bytestring to the C string literal syntax (with necessary escaping).
924+
925+
For example, b'foo\n' gets converted to 'foo\\n' (note that double quotes are not added).
926+
"""
927+
if not _translation_table:
928+
# Initialize the translation table on the first call.
929+
d = {
930+
ord("\n"): "\\n",
931+
ord("\r"): "\\r",
932+
ord("\t"): "\\t",
933+
ord('"'): '\\"',
934+
ord("\\"): "\\\\",
935+
}
936+
for i in range(256):
937+
if i not in d:
938+
if i < 32 or i >= 127:
939+
d[i] = "\\x%.2x" % i
940+
else:
941+
d[i] = chr(i)
942+
_translation_table.update(str.maketrans(d))
943+
return b.decode("latin1").translate(_translation_table)

mypyc/ir/ops.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ class to enable the new behavior. Sometimes adding a new abstract
3939
RVoid,
4040
bit_rprimitive,
4141
bool_rprimitive,
42+
cstring_rprimitive,
4243
float_rprimitive,
4344
int_rprimitive,
4445
is_bit_rprimitive,
@@ -230,6 +231,20 @@ def __init__(self, value: float, line: int = -1) -> None:
230231
self.line = line
231232

232233

234+
@final
235+
class CString(Value):
236+
"""C string literal (zero-terminated).
237+
238+
You can also include zero values in the value, but then you'll need to track
239+
the length of the string separately.
240+
"""
241+
242+
def __init__(self, value: bytes, line: int = -1) -> None:
243+
self.value = value
244+
self.type = cstring_rprimitive
245+
self.line = line
246+
247+
233248
class Op(Value):
234249
"""Abstract base class for all IR operations.
235250

mypyc/ir/pprint.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
Cast,
2222
ComparisonOp,
2323
ControlOp,
24+
CString,
2425
DecRef,
2526
Extend,
2627
Float,
@@ -327,6 +328,8 @@ def format(self, fmt: str, *args: Any) -> str:
327328
result.append(str(arg.value))
328329
elif isinstance(arg, Float):
329330
result.append(repr(arg.value))
331+
elif isinstance(arg, CString):
332+
result.append(f"CString({arg.value!r})")
330333
else:
331334
result.append(self.names[arg])
332335
elif typespec == "d":

mypyc/ir/rtypes.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -254,13 +254,11 @@ def __init__(
254254
elif ctype == "CPyPtr":
255255
# TODO: Invent an overlapping error value?
256256
self.c_undefined = "0"
257-
elif ctype == "PyObject *":
258-
# Boxed types use the null pointer as the error value.
257+
elif ctype.endswith("*"):
258+
# Boxed and pointer types use the null pointer as the error value.
259259
self.c_undefined = "NULL"
260260
elif ctype == "char":
261261
self.c_undefined = "2"
262-
elif ctype in ("PyObject **", "void *"):
263-
self.c_undefined = "NULL"
264262
elif ctype == "double":
265263
self.c_undefined = "-113.0"
266264
elif ctype in ("uint8_t", "uint16_t", "uint32_t", "uint64_t"):
@@ -445,6 +443,10 @@ def __hash__(self) -> int:
445443
"c_ptr", is_unboxed=False, is_refcounted=False, ctype="void *"
446444
)
447445

446+
cstring_rprimitive: Final = RPrimitive(
447+
"cstring", is_unboxed=True, is_refcounted=False, ctype="const char *"
448+
)
449+
448450
# The type corresponding to mypyc.common.BITMAP_TYPE
449451
bitmap_rprimitive: Final = uint32_rprimitive
450452

mypyc/test/test_emitfunc.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
CallC,
2020
Cast,
2121
ComparisonOp,
22+
CString,
2223
DecRef,
2324
Extend,
2425
GetAttr,
@@ -49,6 +50,7 @@
4950
RType,
5051
bool_rprimitive,
5152
c_int_rprimitive,
53+
cstring_rprimitive,
5254
dict_rprimitive,
5355
int32_rprimitive,
5456
int64_rprimitive,
@@ -836,6 +838,30 @@ def test_inc_ref_int_literal(self) -> None:
836838
b = LoadLiteral(x, object_rprimitive)
837839
self.assert_emit([b, IncRef(b)], "CPy_INCREF(cpy_r_r0);")
838840

841+
def test_c_string(self) -> None:
842+
s = Register(cstring_rprimitive, "s")
843+
self.assert_emit(Assign(s, CString(b"foo")), """cpy_r_s = "foo";""")
844+
self.assert_emit(Assign(s, CString(b'foo "o')), r"""cpy_r_s = "foo \"o";""")
845+
self.assert_emit(Assign(s, CString(b"\x00")), r"""cpy_r_s = "\x00";""")
846+
self.assert_emit(Assign(s, CString(b"\\")), r"""cpy_r_s = "\\";""")
847+
for i in range(256):
848+
b = bytes([i])
849+
if b == b"\n":
850+
target = "\\n"
851+
elif b == b"\r":
852+
target = "\\r"
853+
elif b == b"\t":
854+
target = "\\t"
855+
elif b == b'"':
856+
target = '\\"'
857+
elif b == b"\\":
858+
target = "\\\\"
859+
elif i < 32 or i >= 127:
860+
target = "\\x%.2x" % i
861+
else:
862+
target = b.decode("ascii")
863+
self.assert_emit(Assign(s, CString(b)), f'cpy_r_s = "{target}";')
864+
839865
def assert_emit(
840866
self,
841867
op: Op | list[Op],

0 commit comments

Comments
 (0)