python · Jahongir-Qurbonov · Jul 4, 2025 · Jul 4, 2025 · Jul 6, 2025 · Jul 6, 2025
diff --git a/mypyc/lib-rt/CPy.h b/mypyc/lib-rt/CPy.h
@@ -756,6 +756,8 @@ PyObject *CPy_Encode(PyObject *obj, PyObject *encoding, PyObject *errors);
 Py_ssize_t CPyStr_Count(PyObject *unicode, PyObject *substring, CPyTagged start);
 Py_ssize_t CPyStr_CountFull(PyObject *unicode, PyObject *substring, CPyTagged start, CPyTagged end);
 CPyTagged CPyStr_Ord(PyObject *obj);
+PyObject *CPyStr_Lower(PyObject *self);
+PyObject *CPyStr_Upper(PyObject *self);
 
 
 // Bytes operations

diff --git a/mypyc/lib-rt/str_ops.c b/mypyc/lib-rt/str_ops.c
@@ -546,3 +546,79 @@ CPyTagged CPyStr_Ord(PyObject *obj) {
         PyExc_TypeError, "ord() expected a character, but a string of length %zd found", s);
     return CPY_INT_TAG;
 }
+
+PyObject *CPyStr_Lower(PyObject *self) {
+    if (PyUnicode_READY(self) == -1)
+        return NULL;
+
+    Py_ssize_t len = PyUnicode_GET_LENGTH(self);
+
+    // Fast path: ASCII only
+    if (PyUnicode_IS_ASCII(self)) {
+        PyObject *res = PyUnicode_New(len, 127);
+        if (res == NULL)
+            return NULL;
+        const Py_UCS1 *data = PyUnicode_1BYTE_DATA(self);
+        Py_UCS1 *res_data = PyUnicode_1BYTE_DATA(res);
+        for (Py_ssize_t i = 0; i < len; i++) {
+            res_data[i] = Py_TOLOWER((unsigned char) data[i]);
+        }
+        return res;
+    }
+
+    // General Unicode path
+    int kind = PyUnicode_KIND(self);
+    void *data = PyUnicode_DATA(self);
+    Py_UCS4 maxchar = PyUnicode_MAX_CHAR_VALUE(self);
+    PyObject *res = PyUnicode_New(len, maxchar);
+    if (res == NULL)
+        return NULL;
+    int res_kind = PyUnicode_KIND(res);
+    void *res_data = PyUnicode_DATA(res);
+
+    // Unified loop for all Unicode kinds
+    for (Py_ssize_t i = 0; i < len; i++) {
+        Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+        Py_UCS4 rch = Py_UNICODE_TOLOWER(ch);
+        PyUnicode_WRITE(res_kind, res_data, i, rch);
+    }
+    return res;
+}
+
+PyObject *CPyStr_Upper(PyObject *self) {
+    if (PyUnicode_READY(self) == -1)
+        return NULL;
+
+    Py_ssize_t len = PyUnicode_GET_LENGTH(self);
+
+    // Fast path: ASCII only
+    if (PyUnicode_IS_ASCII(self)) {
+        PyObject *res = PyUnicode_New(len, 127);
+        if (res == NULL)
+            return NULL;
+        const Py_UCS1 *data = PyUnicode_1BYTE_DATA(self);
+        Py_UCS1 *res_data = PyUnicode_1BYTE_DATA(res);
+        for (Py_ssize_t i = 0; i < len; i++) {
+            res_data[i] = Py_TOUPPER((unsigned char) data[i]);
+        }
+        return res;
+    }
+
+    // General Unicode path
+    int kind = PyUnicode_KIND(self);
+    void *data = PyUnicode_DATA(self);
+    Py_UCS4 maxchar = PyUnicode_MAX_CHAR_VALUE(self);
+    PyObject *res = PyUnicode_New(len, maxchar);
+    if (res == NULL)
+        return NULL;
+    int res_kind = PyUnicode_KIND(res);
+    void *res_data = PyUnicode_DATA(res);
+
+    // Unified loop for all Unicode kinds
+    for (Py_ssize_t i = 0; i < len; i++) {
+        Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+        Py_UCS4 rch = Py_UNICODE_TOUPPER(ch);
+        PyUnicode_WRITE(res_kind, res_data, i, rch);
+    }
+    return res;
+}
diff --git a/mypyc/primitives/str_ops.py b/mypyc/primitives/str_ops.py
@@ -428,3 +428,21 @@
     c_function_name="CPyStr_Ord",
     error_kind=ERR_MAGIC,
 )
+
+# str.lower()
+method_op(
+    name="lower",
+    arg_types=[str_rprimitive],
+    return_type=str_rprimitive,
+    c_function_name="CPyStr_Lower",
+    error_kind=ERR_MAGIC,
+)
+
+# str.upper()
+method_op(
+    name="upper",
+    arg_types=[str_rprimitive],
+    return_type=str_rprimitive,
+    c_function_name="CPyStr_Upper",
+    error_kind=ERR_MAGIC,
+)
diff --git a/mypyc/test-data/fixtures/ir.py b/mypyc/test-data/fixtures/ir.py
@@ -112,7 +112,6 @@ def lstrip(self, item: Optional[str] = None) -> str: pass
     def rstrip(self, item: Optional[str] = None) -> str: pass
     def join(self, x: Iterable[str]) -> str: pass
     def format(self, *args: Any, **kwargs: Any) -> str: ...
-    def upper(self) -> str: ...
     def startswith(self, x: Union[str, Tuple[str, ...]], start: int=..., end: int=...) -> bool: ...
     def endswith(self, x: Union[str, Tuple[str, ...]], start: int=..., end: int=...) -> bool: ...
     def replace(self, old: str, new: str, maxcount: int=...) -> str: ...
@@ -122,6 +121,8 @@ def rpartition(self, sep: str, /) -> Tuple[str, str, str]: ...
     def removeprefix(self, prefix: str, /) -> str: ...
     def removesuffix(self, suffix: str, /) -> str: ...
     def islower(self) -> bool: ...
+    def lower(self) -> str: ...
+    def upper(self) -> str: ...
 
 class float:
     def __init__(self, x: object) -> None: pass

diff --git a/mypyc/test-data/irbuild-str.test b/mypyc/test-data/irbuild-str.test
@@ -562,3 +562,23 @@ L0:
     r3 = box(native_int, r1)
     r4 = unbox(int, r3)
     return r4
+
+[case testLower]
+def do_lower(s: str) -> str:
+    return s.lower()
+[out]
+def do_lower(s):
+    s, r0 :: str
+L0:
+    r0 = CPyStr_Lower(s)
+    return r0
+
+[case testUpper]
+def do_upper(s: str) -> str:
+    return s.upper()
+[out]
+def do_upper(s):
+    s, r0 :: str
+L0:
+    r0 = CPyStr_Upper(s)
+    return r0
diff --git a/mypyc/test-data/run-strings.test b/mypyc/test-data/run-strings.test
@@ -906,3 +906,30 @@ def test_count_multi_start_end_emoji() -> None:
     assert string.count("😴😴😴", 0, 12) == 1, string.count("😴😴😴", 0, 12)
     assert string.count("🚀🚀🚀", 0, 12) == 2, string.count("🚀🚀🚀", 0, 12)
     assert string.count("ñññ", 0, 12) == 1, string.count("ñññ", 0, 12)
+
+[case testLower]
+def test_str_lower() -> None:
+    assert "".lower() == ""
+    assert "ABC".lower() == "abc"
+    assert "abc".lower() == "abc"
+    assert "AbC123".lower() == "abc123"
+    assert "áÉÍ".lower() == "áéí"
+    assert "😴🚀".lower() == "😴🚀"
+    # Special
+    assert "SS".lower() == "ss"
+    assert "Σ".lower() == "σ"  # Greek capital sigma -> small sigma
+    #assert "İ".lower() == "i̇"  # TODO: Latin capital letter I with dot above -> 'i' + combining dot
+    #assert len("İ".lower()) == 2  # TODO: Confirms length change
+
+[case testUpper]
+def test_str_upper() -> None:
+    assert "".upper() == ""
+    assert "abc".upper() == "ABC"
+    assert "ABC".upper() == "ABC"
+    assert "AbC123".upper() == "ABC123"
+    assert "áéí".upper() == "ÁÉÍ"
+    assert "😴🚀".upper() == "😴🚀"
+    # Special
+    #assert "ß".upper() == "SS"     # TODO: German sharp S -> double S
+    #assert "ﬃ".upper() == "FFI"    # TODO: Ligature 'ffi' -> separate letters
+    #assert len("ﬃ".upper()) == 3   # TODO: Confirm length increases