@@ -600,6 +600,9 @@ def urldefrag(url):
600
600
601
601
def unquote_to_bytes (string ):
602
602
"""unquote_to_bytes('abc%20def') -> b'abc def'."""
603
+ return bytes (_unquote_to_bytearray (string ))
604
+
605
+ def _unquote_to_bytearray (string ):
603
606
# Note: strings are encoded as UTF-8. This is only an issue if it contains
604
607
# unescaped non-ASCII characters, which URIs should not.
605
608
if not string :
@@ -611,8 +614,8 @@ def unquote_to_bytes(string):
611
614
bits = string .split (b'%' )
612
615
if len (bits ) == 1 :
613
616
return string
614
- res = [ bits [0 ]]
615
- append = res .append
617
+ res = bytearray ( bits [0 ])
618
+ add_data = res .extend
616
619
# Delay the initialization of the table to not waste memory
617
620
# if the function is never called
618
621
global _hextobyte
@@ -621,15 +624,25 @@ def unquote_to_bytes(string):
621
624
for a in _hexdig for b in _hexdig }
622
625
for item in bits [1 :]:
623
626
try :
624
- append (_hextobyte [item [:2 ]])
625
- append (item [2 :])
627
+ add_data (_hextobyte [item [:2 ]])
628
+ add_data (item [2 :])
626
629
except KeyError :
627
- append (b'%' )
628
- append (item )
629
- return b'' . join ( res )
630
+ add_data (b'%' )
631
+ add_data (item )
632
+ return res
630
633
631
634
_asciire = re .compile ('([\x00 -\x7f ]+)' )
632
635
636
+ def _generate_unquoted_parts (string , encoding , errors ):
637
+ previous_match_end = 0
638
+ for ascii_match in _asciire .finditer (string ):
639
+ start , end = ascii_match .span ()
640
+ yield string [previous_match_end :start ] # Non-ASCII
641
+ # The ascii_match[1] group == string[start:end].
642
+ yield _unquote_to_bytearray (ascii_match [1 ]).decode (encoding , errors )
643
+ previous_match_end = end
644
+ yield string [previous_match_end :] # Non-ASCII tail
645
+
633
646
def unquote (string , encoding = 'utf-8' , errors = 'replace' ):
634
647
"""Replace %xx escapes by their single-character equivalent. The optional
635
648
encoding and errors parameters specify how to decode percent-encoded
@@ -641,22 +654,16 @@ def unquote(string, encoding='utf-8', errors='replace'):
641
654
unquote('abc%20def') -> 'abc def'.
642
655
"""
643
656
if isinstance (string , bytes ):
644
- return unquote_to_bytes (string ).decode (encoding , errors )
657
+ return _unquote_to_bytearray (string ).decode (encoding , errors )
645
658
if '%' not in string :
659
+ # Is it a string-like object?
646
660
string .split
647
661
return string
648
662
if encoding is None :
649
663
encoding = 'utf-8'
650
664
if errors is None :
651
665
errors = 'replace'
652
- bits = _asciire .split (string )
653
- res = [bits [0 ]]
654
- append = res .append
655
- for i in range (1 , len (bits ), 2 ):
656
- append (unquote_to_bytes (bits [i ]).decode (encoding , errors ))
657
- append (bits [i + 1 ])
658
- return '' .join (res )
659
-
666
+ return '' .join (_generate_unquoted_parts (string , encoding , errors ))
660
667
661
668
def parse_qs (qs , keep_blank_values = False , strict_parsing = False ,
662
669
encoding = 'utf-8' , errors = 'replace' , max_num_fields = None , separator = '&' ):
0 commit comments