Skip to content

Commit 80edff1

Browse files
authored
Defer XML string cleaning to improve performance (#1511)
* perf: Attempt to parse XML strings without cleaning (which is expensive) before trying again with cleaning * refactor: Removed unused XML import * refactor: Make use of the new parseXMLString function in myplex.py * perf: Only encode strings to utf-8 once in the event a parse error occurs in parseXMLStrings * refacotr: Updated the parseXMLString function to more closely follow the logic that previously existed * refactor: Use explicit imports and reorder imports according to the repo's style guidelines * fix: Bad exception handler
1 parent 9758159 commit 80edff1

File tree

5 files changed

+18
-13
lines changed

5 files changed

+18
-13
lines changed

plexapi/client.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -197,8 +197,7 @@ def query(self, path, method=None, headers=None, timeout=None, **kwargs):
197197
raise NotFound(message)
198198
else:
199199
raise BadRequest(message)
200-
data = utils.cleanXMLString(response.text).encode('utf8')
201-
return ElementTree.fromstring(data) if data.strip() else None
200+
return utils.parseXMLString(response.text)
202201

203202
def sendCommand(self, command, proxy=None, **params):
204203
""" Convenience wrapper around :func:`~plexapi.client.PlexClient.query` to more easily

plexapi/media.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# -*- coding: utf-8 -*-
2-
import xml
32
from pathlib import Path
43
from urllib.parse import quote_plus
4+
from xml.etree import ElementTree
55

66
from plexapi import log, settings, utils
77
from plexapi.base import PlexObject
@@ -1077,7 +1077,7 @@ def select(self):
10771077
data = f'{key}?url={quote_plus(self.ratingKey)}'
10781078
try:
10791079
self._server.query(data, method=self._server._session.put)
1080-
except xml.etree.ElementTree.ParseError:
1080+
except ElementTree.ParseError:
10811081
pass
10821082

10831083
@property

plexapi/myplex.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import threading
55
import time
66
from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit
7-
from xml.etree import ElementTree
87

98
import requests
109

@@ -250,8 +249,7 @@ def query(self, url, method=None, headers=None, timeout=None, **kwargs):
250249
return response.json()
251250
elif 'text/plain' in response.headers.get('Content-Type', ''):
252251
return response.text.strip()
253-
data = utils.cleanXMLString(response.text).encode('utf8')
254-
return ElementTree.fromstring(data) if data.strip() else None
252+
return utils.parseXMLString(response.text)
255253

256254
def ping(self):
257255
""" Ping the Plex.tv API.
@@ -1879,8 +1877,7 @@ def _query(self, url, method=None, headers=None, **kwargs):
18791877
codename = codes.get(response.status_code)[0]
18801878
errtext = response.text.replace('\n', ' ')
18811879
raise BadRequest(f'({response.status_code}) {codename} {response.url}; {errtext}')
1882-
data = response.text.encode('utf8')
1883-
return ElementTree.fromstring(data) if data.strip() else None
1880+
return utils.parseXMLString(response.text)
18841881

18851882

18861883
def _connect(cls, url, token, session, timeout, results, i, job_is_done_event=None):

plexapi/server.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import os
33
from functools import cached_property
44
from urllib.parse import urlencode
5-
from xml.etree import ElementTree
65

76
import requests
87

@@ -768,8 +767,7 @@ def query(self, key, method=None, headers=None, params=None, timeout=None, **kwa
768767
raise NotFound(message)
769768
else:
770769
raise BadRequest(message)
771-
data = utils.cleanXMLString(response.text).encode('utf8')
772-
return ElementTree.fromstring(data) if data.strip() else None
770+
return utils.parseXMLString(response.text)
773771

774772
def search(self, query, mediatype=None, limit=None, sectionId=None):
775773
""" Returns a list of media items or filter categories from the resulting

plexapi/utils.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@
1717
from hashlib import sha1
1818
from threading import Event, Thread
1919
from urllib.parse import quote
20+
from xml.etree import ElementTree
2021

2122
import requests
2223
from requests.status_codes import _codes as codes
2324

2425
from plexapi.exceptions import BadRequest, NotFound, Unauthorized
25-
2626
try:
2727
from tqdm import tqdm
2828
except ImportError:
@@ -718,3 +718,14 @@ def sha1hash(guid):
718718

719719
def cleanXMLString(s):
720720
return _illegal_XML_re.sub('', s)
721+
722+
723+
def parseXMLString(s: str):
724+
""" Parse an XML string and return an ElementTree object. """
725+
if not s.strip():
726+
return None
727+
try: # Attempt to parse the string as-is without cleaning (which is expensive)
728+
return ElementTree.fromstring(s.encode('utf-8'))
729+
except ElementTree.ParseError: # If it fails, clean the string and try again
730+
cleaned_s = cleanXMLString(s).encode('utf-8')
731+
return ElementTree.fromstring(cleaned_s) if cleaned_s.strip() else None

0 commit comments

Comments
 (0)