diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 5fe9d6884106ad..07c97e5cb566d4 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -2960,6 +2960,24 @@ def element_factory(x, y): del b gc_collect() + def test_deepcopy(self): + # Prevent crashes when __deepcopy__() mutates the element being copied. + # See https://github.com/python/cpython/issues/133009. + class X(ET.Element): + def __deepcopy__(self, memo): + root.clear() + return self + + root = ET.Element('a') + evil = X('x') + root.extend([evil, ET.Element('y')]) + if is_python_implementation(): + self.assertRaises(RuntimeError, copy.deepcopy, root) + else: + c = copy.deepcopy(root) + # In the C implementation, we can still copy the evil element. + self.assertListEqual(list(c), [evil]) + class MutationDeleteElementPath(str): def __new__(cls, elem, *args): diff --git a/Misc/NEWS.d/next/Library/2025-04-26-15-50-12.gh-issue-133009.etBuz5.rst b/Misc/NEWS.d/next/Library/2025-04-26-15-50-12.gh-issue-133009.etBuz5.rst new file mode 100644 index 00000000000000..1f7155c6a40d00 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-04-26-15-50-12.gh-issue-133009.etBuz5.rst @@ -0,0 +1,3 @@ +:mod:`xml.etree.ElementTree`: Fix a crash in :meth:`Element.__deepcopy__ +` when the element is concurrently mutated. +Patch by Bénédikt Tran. diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 24b567b6caa260..91cf66ebf007fb 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -805,6 +805,7 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) { Py_ssize_t i; ElementObject* element; + PyObject* tmp; PyObject* tag; PyObject* attrib; PyObject* text; @@ -813,12 +814,16 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) PyTypeObject *tp = Py_TYPE(self); elementtreestate *st = get_elementtree_state_by_type(tp); - tag = deepcopy(st, self->tag, memo); + tmp = Py_NewRef(self->tag); + tag = deepcopy(st, tmp, memo); + Py_CLEAR(tmp); if (!tag) return NULL; if (self->extra && self->extra->attrib) { - attrib = deepcopy(st, self->extra->attrib, memo); + tmp = Py_NewRef(self->extra->attrib); + attrib = deepcopy(st, tmp, memo); + Py_CLEAR(tmp); if (!attrib) { Py_DECREF(tag); return NULL; @@ -835,12 +840,16 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) if (!element) return NULL; - text = deepcopy(st, JOIN_OBJ(self->text), memo); + tmp = Py_NewRef(JOIN_OBJ(self->text)); + text = deepcopy(st, tmp, memo); + Py_CLEAR(tmp); if (!text) goto error; _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text))); - tail = deepcopy(st, JOIN_OBJ(self->tail), memo); + tmp = Py_NewRef(JOIN_OBJ(self->tail)); + tail = deepcopy(st, tmp, memo); + Py_CLEAR(tmp); if (!tail) goto error; _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail))); @@ -850,9 +859,10 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) if (element_resize(element, self->extra->length) < 0) goto error; - // TODO(picnixz): check for an evil child's __deepcopy__ on 'self' - for (i = 0; i < self->extra->length; i++) { - PyObject* child = deepcopy(st, self->extra->children[i], memo); + for (i = 0; self->extra && i < self->extra->length; i++) { + tmp = Py_NewRef(self->extra->children[i]); + PyObject* child = deepcopy(st, tmp, memo); + Py_CLEAR(tmp); if (!child || !Element_Check(st, child)) { if (child) { raise_type_error(child); @@ -865,7 +875,12 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) } assert(!element->extra->length); - element->extra->length = self->extra->length; + /* + * The original 'self->extra' may be gone at this point if deepcopy() + * had side-effects. However, 'i' is the number of copied items that + * we were able to successfully copy. + */ + element->extra->length = i; } /* add object to memo dictionary (so deepcopy won't visit it again) */