@@ -265,6 +265,8 @@ unicode_fill(enum PyUnicode_Kind kind, void *data, Py_UCS4 value,
265
265
/* Forward declaration */
266
266
static inline int
267
267
_PyUnicodeWriter_WriteCharInline (_PyUnicodeWriter * writer , Py_UCS4 ch );
268
+ static inline void
269
+ _PyUnicodeWriter_InitWithBuffer (_PyUnicodeWriter * writer , PyObject * buffer );
268
270
static PyObject *
269
271
unicode_encode_utf8 (PyObject * unicode , _Py_error_handler error_handler ,
270
272
const char * errors );
@@ -4877,16 +4879,6 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
4877
4879
_Py_error_handler error_handler , const char * errors ,
4878
4880
Py_ssize_t * consumed )
4879
4881
{
4880
- _PyUnicodeWriter writer ;
4881
- const char * starts = s ;
4882
- const char * end = s + size ;
4883
-
4884
- Py_ssize_t startinpos ;
4885
- Py_ssize_t endinpos ;
4886
- const char * errmsg = "" ;
4887
- PyObject * error_handler_obj = NULL ;
4888
- PyObject * exc = NULL ;
4889
-
4890
4882
if (size == 0 ) {
4891
4883
if (consumed )
4892
4884
* consumed = 0 ;
@@ -4900,13 +4892,29 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
4900
4892
return get_latin1_char ((unsigned char )s [0 ]);
4901
4893
}
4902
4894
4903
- _PyUnicodeWriter_Init (& writer );
4904
- writer .min_length = size ;
4905
- if (_PyUnicodeWriter_Prepare (& writer , writer .min_length , 127 ) == -1 )
4906
- goto onError ;
4895
+ const char * starts = s ;
4896
+ const char * end = s + size ;
4897
+
4898
+ // fast path: try ASCII string.
4899
+ PyObject * u = PyUnicode_New (size , 127 );
4900
+ if (u == NULL ) {
4901
+ return NULL ;
4902
+ }
4903
+ s += ascii_decode (s , end , PyUnicode_DATA (u ));
4904
+ if (s == end ) {
4905
+ return u ;
4906
+ }
4907
+
4908
+ // Use _PyUnicodeWriter after fast path is failed.
4909
+ _PyUnicodeWriter writer ;
4910
+ _PyUnicodeWriter_InitWithBuffer (& writer , u );
4911
+ writer .pos = s - starts ;
4912
+
4913
+ Py_ssize_t startinpos , endinpos ;
4914
+ const char * errmsg = "" ;
4915
+ PyObject * error_handler_obj = NULL ;
4916
+ PyObject * exc = NULL ;
4907
4917
4908
- writer .pos = ascii_decode (s , end , writer .data );
4909
- s += writer .pos ;
4910
4918
while (s < end ) {
4911
4919
Py_UCS4 ch ;
4912
4920
int kind = writer .kind ;
@@ -6451,7 +6459,7 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s,
6451
6459
length after conversion to the true value. (But decoding error
6452
6460
handler might have to resize the string) */
6453
6461
_PyUnicodeWriter_Init (& writer );
6454
- writer .min_length = size ;
6462
+ writer .min_length = size ;
6455
6463
if (_PyUnicodeWriter_Prepare (& writer , size , 127 ) < 0 ) {
6456
6464
goto onError ;
6457
6465
}
@@ -6975,13 +6983,7 @@ PyUnicode_DecodeASCII(const char *s,
6975
6983
const char * errors )
6976
6984
{
6977
6985
const char * starts = s ;
6978
- _PyUnicodeWriter writer ;
6979
- int kind ;
6980
- void * data ;
6981
- Py_ssize_t startinpos ;
6982
- Py_ssize_t endinpos ;
6983
- Py_ssize_t outpos ;
6984
- const char * e ;
6986
+ const char * e = s + size ;
6985
6987
PyObject * error_handler_obj = NULL ;
6986
6988
PyObject * exc = NULL ;
6987
6989
_Py_error_handler error_handler = _Py_ERROR_UNKNOWN ;
@@ -6993,20 +6995,25 @@ PyUnicode_DecodeASCII(const char *s,
6993
6995
if (size == 1 && (unsigned char )s [0 ] < 128 )
6994
6996
return get_latin1_char ((unsigned char )s [0 ]);
6995
6997
6996
- _PyUnicodeWriter_Init ( & writer );
6997
- writer . min_length = size ;
6998
- if (_PyUnicodeWriter_Prepare ( & writer , writer . min_length , 127 ) < 0 )
6998
+ // Shortcut for simple case
6999
+ PyObject * u = PyUnicode_New ( size , 127 ) ;
7000
+ if (u == NULL ) {
6999
7001
return NULL ;
7002
+ }
7003
+ Py_ssize_t outpos = ascii_decode (s , e , PyUnicode_DATA (u ));
7004
+ if (outpos == size ) {
7005
+ return u ;
7006
+ }
7000
7007
7001
- e = s + size ;
7002
- data = writer .data ;
7003
- outpos = ascii_decode (s , e , (Py_UCS1 * )data );
7008
+ _PyUnicodeWriter writer ;
7009
+ _PyUnicodeWriter_InitWithBuffer (& writer , u );
7004
7010
writer .pos = outpos ;
7005
- if (writer .pos == size )
7006
- return _PyUnicodeWriter_Finish (& writer );
7007
7011
7008
- s += writer .pos ;
7009
- kind = writer .kind ;
7012
+ s += outpos ;
7013
+ int kind = writer .kind ;
7014
+ void * data = writer .data ;
7015
+ Py_ssize_t startinpos , endinpos ;
7016
+
7010
7017
while (s < e ) {
7011
7018
unsigned char c = (unsigned char )* s ;
7012
7019
if (c < 128 ) {
@@ -13506,6 +13513,16 @@ _PyUnicodeWriter_Init(_PyUnicodeWriter *writer)
13506
13513
assert (writer -> kind <= PyUnicode_1BYTE_KIND );
13507
13514
}
13508
13515
13516
+ // Initialize _PyUnicodeWriter with initial buffer
13517
+ static inline void
13518
+ _PyUnicodeWriter_InitWithBuffer (_PyUnicodeWriter * writer , PyObject * buffer )
13519
+ {
13520
+ memset (writer , 0 , sizeof (* writer ));
13521
+ writer -> buffer = buffer ;
13522
+ _PyUnicodeWriter_Update (writer );
13523
+ writer -> min_length = writer -> size ;
13524
+ }
13525
+
13509
13526
int
13510
13527
_PyUnicodeWriter_PrepareInternal (_PyUnicodeWriter * writer ,
13511
13528
Py_ssize_t length , Py_UCS4 maxchar )
0 commit comments