Skip to content

Defer XML string cleaning to improve performance #1511

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
3 changes: 1 addition & 2 deletions plexapi/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,7 @@ def query(self, path, method=None, headers=None, timeout=None, **kwargs):
raise NotFound(message)
else:
raise BadRequest(message)
data = utils.cleanXMLString(response.text).encode('utf8')
return ElementTree.fromstring(data) if data.strip() else None
return utils.parseXMLString(response.text)

def sendCommand(self, command, proxy=None, **params):
""" Convenience wrapper around :func:`~plexapi.client.PlexClient.query` to more easily
Expand Down
4 changes: 2 additions & 2 deletions plexapi/media.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
import xml
from pathlib import Path
from urllib.parse import quote_plus
from xml.etree import ElementTree

from plexapi import log, settings, utils
from plexapi.base import PlexObject
Expand Down Expand Up @@ -1075,7 +1075,7 @@ def select(self):
data = f'{key}?url={quote_plus(self.ratingKey)}'
try:
self._server.query(data, method=self._server._session.put)
except xml.etree.ElementTree.ParseError:
except ElementTree.ParseError:
pass

@property
Expand Down
7 changes: 2 additions & 5 deletions plexapi/myplex.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import threading
import time
from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit
from xml.etree import ElementTree

import requests

Expand Down Expand Up @@ -250,8 +249,7 @@ def query(self, url, method=None, headers=None, timeout=None, **kwargs):
return response.json()
elif 'text/plain' in response.headers.get('Content-Type', ''):
return response.text.strip()
data = utils.cleanXMLString(response.text).encode('utf8')
return ElementTree.fromstring(data) if data.strip() else None
return utils.parseXMLString(response.text)

def ping(self):
""" Ping the Plex.tv API.
Expand Down Expand Up @@ -1879,8 +1877,7 @@ def _query(self, url, method=None, headers=None, **kwargs):
codename = codes.get(response.status_code)[0]
errtext = response.text.replace('\n', ' ')
raise BadRequest(f'({response.status_code}) {codename} {response.url}; {errtext}')
data = response.text.encode('utf8')
return ElementTree.fromstring(data) if data.strip() else None
return utils.parseXMLString(response.text)


def _connect(cls, url, token, session, timeout, results, i, job_is_done_event=None):
Expand Down
4 changes: 1 addition & 3 deletions plexapi/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import os
from functools import cached_property
from urllib.parse import urlencode
from xml.etree import ElementTree

import requests

Expand Down Expand Up @@ -768,8 +767,7 @@ def query(self, key, method=None, headers=None, params=None, timeout=None, **kwa
raise NotFound(message)
else:
raise BadRequest(message)
data = utils.cleanXMLString(response.text).encode('utf8')
return ElementTree.fromstring(data) if data.strip() else None
return utils.parseXMLString(response.text)

def search(self, query, mediatype=None, limit=None, sectionId=None):
""" Returns a list of media items or filter categories from the resulting
Expand Down
13 changes: 12 additions & 1 deletion plexapi/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@
from hashlib import sha1
from threading import Event, Thread
from urllib.parse import quote
from xml.etree import ElementTree

import requests
from requests.status_codes import _codes as codes

from plexapi.exceptions import BadRequest, NotFound, Unauthorized

try:
from tqdm import tqdm
except ImportError:
Expand Down Expand Up @@ -718,3 +718,14 @@ def sha1hash(guid):

def cleanXMLString(s):
return _illegal_XML_re.sub('', s)


def parseXMLString(s: str):
""" Parse an XML string and return an ElementTree object. """
if not s.strip():
return None
try: # Attempt to parse the string as-is without cleaning (which is expensive)
return ElementTree.fromstring(s.encode('utf-8'))
except ElementTree.ParseError: # If it fails, clean the string and try again
cleaned_s = cleanXMLString(s).encode('utf-8')
return ElementTree.fromstring(cleaned_s) if cleaned_s.strip() else None
Loading