Skip to content

Commit e23cd8d

Browse files
authored
Sanitize XML strings before parsing (#1452)
1 parent bbe3e8e commit e23cd8d

File tree

4 files changed

+46
-3
lines changed

4 files changed

+46
-3
lines changed

plexapi/client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ def query(self, path, method=None, headers=None, timeout=None, **kwargs):
197197
raise NotFound(message)
198198
else:
199199
raise BadRequest(message)
200-
data = response.text.encode('utf8')
200+
data = utils.cleanXMLString(response.text).encode('utf8')
201201
return ElementTree.fromstring(data) if data.strip() else None
202202

203203
def sendCommand(self, command, proxy=None, **params):

plexapi/myplex.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ def query(self, url, method=None, headers=None, timeout=None, **kwargs):
250250
return response.json()
251251
elif 'text/plain' in response.headers.get('Content-Type', ''):
252252
return response.text.strip()
253-
data = response.text.encode('utf8')
253+
data = utils.cleanXMLString(response.text).encode('utf8')
254254
return ElementTree.fromstring(data) if data.strip() else None
255255

256256
def ping(self):

plexapi/server.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -768,7 +768,7 @@ def query(self, key, method=None, headers=None, params=None, timeout=None, **kwa
768768
raise NotFound(message)
769769
else:
770770
raise BadRequest(message)
771-
data = response.text.encode('utf8')
771+
data = utils.cleanXMLString(response.text).encode('utf8')
772772
return ElementTree.fromstring(data) if data.strip() else None
773773

774774
def search(self, query, mediatype=None, limit=None, sectionId=None):

plexapi/utils.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import os
77
import re
88
import string
9+
import sys
910
import time
1011
import unicodedata
1112
import warnings
@@ -673,3 +674,45 @@ def openOrRead(file):
673674
def sha1hash(guid):
674675
""" Return the SHA1 hash of a guid. """
675676
return sha1(guid.encode('utf-8')).hexdigest()
677+
678+
679+
# https://stackoverflow.com/a/64570125
680+
_illegal_XML_characters = [
681+
(0x00, 0x08),
682+
(0x0B, 0x0C),
683+
(0x0E, 0x1F),
684+
(0x7F, 0x84),
685+
(0x86, 0x9F),
686+
(0xFDD0, 0xFDDF),
687+
(0xFFFE, 0xFFFF),
688+
]
689+
if sys.maxunicode >= 0x10000: # not narrow build
690+
_illegal_XML_characters.extend(
691+
[
692+
(0x1FFFE, 0x1FFFF),
693+
(0x2FFFE, 0x2FFFF),
694+
(0x3FFFE, 0x3FFFF),
695+
(0x4FFFE, 0x4FFFF),
696+
(0x5FFFE, 0x5FFFF),
697+
(0x6FFFE, 0x6FFFF),
698+
(0x7FFFE, 0x7FFFF),
699+
(0x8FFFE, 0x8FFFF),
700+
(0x9FFFE, 0x9FFFF),
701+
(0xAFFFE, 0xAFFFF),
702+
(0xBFFFE, 0xBFFFF),
703+
(0xCFFFE, 0xCFFFF),
704+
(0xDFFFE, 0xDFFFF),
705+
(0xEFFFE, 0xEFFFF),
706+
(0xFFFFE, 0xFFFFF),
707+
(0x10FFFE, 0x10FFFF),
708+
]
709+
)
710+
_illegal_XML_ranges = [
711+
fr'{chr(low)}-{chr(high)}'
712+
for (low, high) in _illegal_XML_characters
713+
]
714+
_illegal_XML_re = re.compile(fr'[{"".join(_illegal_XML_ranges)}]')
715+
716+
717+
def cleanXMLString(s):
718+
return _illegal_XML_re.sub('', s)

0 commit comments

Comments
 (0)