From f87a81f69751ac3fed5ae1a245f7ee5e1a077961 Mon Sep 17 00:00:00 2001 From: Elias Benbourenane Date: Fri, 4 Apr 2025 16:11:20 +0000 Subject: [PATCH 1/7] perf: Attempt to parse XML strings without cleaning (which is expensive) before trying again with cleaning --- plexapi/client.py | 3 +-- plexapi/myplex.py | 3 +-- plexapi/server.py | 3 +-- plexapi/utils.py | 12 ++++++++++++ 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/plexapi/client.py b/plexapi/client.py index 3d89e3dc6..5436a2a97 100644 --- a/plexapi/client.py +++ b/plexapi/client.py @@ -197,8 +197,7 @@ def query(self, path, method=None, headers=None, timeout=None, **kwargs): raise NotFound(message) else: raise BadRequest(message) - data = utils.cleanXMLString(response.text).encode('utf8') - return ElementTree.fromstring(data) if data.strip() else None + return utils.parseXMLString(response.text) def sendCommand(self, command, proxy=None, **params): """ Convenience wrapper around :func:`~plexapi.client.PlexClient.query` to more easily diff --git a/plexapi/myplex.py b/plexapi/myplex.py index 448a2649a..d4e4899e1 100644 --- a/plexapi/myplex.py +++ b/plexapi/myplex.py @@ -250,8 +250,7 @@ def query(self, url, method=None, headers=None, timeout=None, **kwargs): return response.json() elif 'text/plain' in response.headers.get('Content-Type', ''): return response.text.strip() - data = utils.cleanXMLString(response.text).encode('utf8') - return ElementTree.fromstring(data) if data.strip() else None + return utils.parseXMLString(response.text) def ping(self): """ Ping the Plex.tv API. diff --git a/plexapi/server.py b/plexapi/server.py index 8cd110d80..e0318325e 100644 --- a/plexapi/server.py +++ b/plexapi/server.py @@ -768,8 +768,7 @@ def query(self, key, method=None, headers=None, params=None, timeout=None, **kwa raise NotFound(message) else: raise BadRequest(message) - data = utils.cleanXMLString(response.text).encode('utf8') - return ElementTree.fromstring(data) if data.strip() else None + return utils.parseXMLString(response.text) def search(self, query, mediatype=None, limit=None, sectionId=None): """ Returns a list of media items or filter categories from the resulting diff --git a/plexapi/utils.py b/plexapi/utils.py index dd1cfc9ce..a1b4a45c3 100644 --- a/plexapi/utils.py +++ b/plexapi/utils.py @@ -23,6 +23,8 @@ from plexapi.exceptions import BadRequest, NotFound, Unauthorized +from xml.etree import ElementTree + try: from tqdm import tqdm except ImportError: @@ -718,3 +720,13 @@ def sha1hash(guid): def cleanXMLString(s): return _illegal_XML_re.sub('', s) + + +def parseXMLString(s: str): + """ Parse an XML string and return an ElementTree object. """ + if not s.strip(): + return None + try: # Attempt to parse the string as-is without cleaning (which is expensive) + return ElementTree.fromstring(s.encode('utf-8')) + except ElementTree.ParseError: # If it fails, clean the string and try again + return ElementTree.fromstring(cleanXMLString(s).encode('utf-8')) From 938b90521b967cafa49b5392564feb565870ef70 Mon Sep 17 00:00:00 2001 From: Elias Benbourenane Date: Fri, 4 Apr 2025 16:33:48 +0000 Subject: [PATCH 2/7] refactor: Removed unused XML import --- plexapi/server.py | 1 - 1 file changed, 1 deletion(-) diff --git a/plexapi/server.py b/plexapi/server.py index e0318325e..cee77fe66 100644 --- a/plexapi/server.py +++ b/plexapi/server.py @@ -2,7 +2,6 @@ import os from functools import cached_property from urllib.parse import urlencode -from xml.etree import ElementTree import requests From 706461d6d10d7d851c0841fdcd8bb73b8d7d1115 Mon Sep 17 00:00:00 2001 From: Elias Benbourenane Date: Fri, 4 Apr 2025 17:13:27 +0000 Subject: [PATCH 3/7] refactor: Make use of the new parseXMLString function in myplex.py --- plexapi/myplex.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/plexapi/myplex.py b/plexapi/myplex.py index d4e4899e1..c76993539 100644 --- a/plexapi/myplex.py +++ b/plexapi/myplex.py @@ -4,7 +4,6 @@ import threading import time from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit -from xml.etree import ElementTree import requests @@ -1878,8 +1877,7 @@ def _query(self, url, method=None, headers=None, **kwargs): codename = codes.get(response.status_code)[0] errtext = response.text.replace('\n', ' ') raise BadRequest(f'({response.status_code}) {codename} {response.url}; {errtext}') - data = response.text.encode('utf8') - return ElementTree.fromstring(data) if data.strip() else None + return utils.parseXMLString(response.text) def _connect(cls, url, token, session, timeout, results, i, job_is_done_event=None): From f76ec223b8f8836af9e49cad2b27c575c1de5036 Mon Sep 17 00:00:00 2001 From: Elias Benbourenane Date: Fri, 4 Apr 2025 17:16:10 +0000 Subject: [PATCH 4/7] perf: Only encode strings to utf-8 once in the event a parse error occurs in parseXMLStrings --- plexapi/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/plexapi/utils.py b/plexapi/utils.py index a1b4a45c3..a5825bb74 100644 --- a/plexapi/utils.py +++ b/plexapi/utils.py @@ -726,7 +726,8 @@ def parseXMLString(s: str): """ Parse an XML string and return an ElementTree object. """ if not s.strip(): return None + encoded_s = s.encode('utf-8') try: # Attempt to parse the string as-is without cleaning (which is expensive) - return ElementTree.fromstring(s.encode('utf-8')) + return ElementTree.fromstring(encoded_s) except ElementTree.ParseError: # If it fails, clean the string and try again - return ElementTree.fromstring(cleanXMLString(s).encode('utf-8')) + return ElementTree.fromstring(cleanXMLString(encoded_s)) From ffefe07dcc1870b30a92bdccec7e481fc6786f83 Mon Sep 17 00:00:00 2001 From: Elias Benbourenane Date: Fri, 4 Apr 2025 17:35:10 +0000 Subject: [PATCH 5/7] refacotr: Updated the parseXMLString function to more closely follow the logic that previously existed --- plexapi/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plexapi/utils.py b/plexapi/utils.py index a5825bb74..d727675c1 100644 --- a/plexapi/utils.py +++ b/plexapi/utils.py @@ -726,8 +726,8 @@ def parseXMLString(s: str): """ Parse an XML string and return an ElementTree object. """ if not s.strip(): return None - encoded_s = s.encode('utf-8') try: # Attempt to parse the string as-is without cleaning (which is expensive) - return ElementTree.fromstring(encoded_s) + return ElementTree.fromstring(s.encode('utf-8')) except ElementTree.ParseError: # If it fails, clean the string and try again - return ElementTree.fromstring(cleanXMLString(encoded_s)) + cleaned_s = cleanXMLString(s).encode('utf-8') + return ElementTree.fromstring(cleaned_s) if cleaned_s.strip() else None From 8894883cda07d2d72410f40fbde64e43937c69ed Mon Sep 17 00:00:00 2001 From: Elias Benbourenane Date: Mon, 7 Apr 2025 00:29:54 +0000 Subject: [PATCH 6/7] refactor: Use explicit imports and reorder imports according to the repo's style guidelines --- plexapi/media.py | 2 +- plexapi/utils.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/plexapi/media.py b/plexapi/media.py index 9c6e3115b..676c64c3c 100644 --- a/plexapi/media.py +++ b/plexapi/media.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -import xml from pathlib import Path from urllib.parse import quote_plus +from xml.etree import ElementTree from plexapi import log, settings, utils from plexapi.base import PlexObject diff --git a/plexapi/utils.py b/plexapi/utils.py index d727675c1..bbff6a8e0 100644 --- a/plexapi/utils.py +++ b/plexapi/utils.py @@ -17,14 +17,12 @@ from hashlib import sha1 from threading import Event, Thread from urllib.parse import quote +from xml.etree import ElementTree import requests from requests.status_codes import _codes as codes from plexapi.exceptions import BadRequest, NotFound, Unauthorized - -from xml.etree import ElementTree - try: from tqdm import tqdm except ImportError: From 1443299fc0fdd9b8b049d83e452ae52615346217 Mon Sep 17 00:00:00 2001 From: Elias Benbourenane Date: Mon, 7 Apr 2025 17:34:29 +0000 Subject: [PATCH 7/7] fix: Bad exception handler --- plexapi/media.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plexapi/media.py b/plexapi/media.py index 676c64c3c..0f7f1d7ad 100644 --- a/plexapi/media.py +++ b/plexapi/media.py @@ -1075,7 +1075,7 @@ def select(self): data = f'{key}?url={quote_plus(self.ratingKey)}' try: self._server.query(data, method=self._server._session.put) - except xml.etree.ElementTree.ParseError: + except ElementTree.ParseError: pass @property