File tree 3 files changed +31
-4
lines changed
Misc/NEWS.d/next/Core and Builtins
3 files changed +31
-4
lines changed Original file line number Diff line number Diff line change @@ -720,6 +720,13 @@ def test_isidentifier(self):
720
720
self .assertFalse ("©" .isidentifier ())
721
721
self .assertFalse ("0" .isidentifier ())
722
722
723
+ @support .cpython_only
724
+ def test_isidentifier_legacy (self ):
725
+ import _testcapi
726
+ u = '𝖀𝖓𝖎𝖈𝖔𝖉𝖊'
727
+ self .assertTrue (u .isidentifier ())
728
+ self .assertTrue (_testcapi .unicode_legacy_string (u ).isidentifier ())
729
+
723
730
def test_isprintable (self ):
724
731
self .assertTrue ("" .isprintable ())
725
732
self .assertTrue (" " .isprintable ())
Original file line number Diff line number Diff line change
1
+ Fixed :meth: `str.isidentifier ` for non-canonicalized strings containing
2
+ non-BMP characters on Windows.
Original file line number Diff line number Diff line change @@ -12356,20 +12356,38 @@ PyUnicode_IsIdentifier(PyObject *self)
12356
12356
return len && i == len ;
12357
12357
}
12358
12358
else {
12359
- Py_ssize_t i , len = PyUnicode_GET_SIZE (self );
12359
+ Py_ssize_t i = 0 , len = PyUnicode_GET_SIZE (self );
12360
12360
if (len == 0 ) {
12361
12361
/* an empty string is not a valid identifier */
12362
12362
return 0 ;
12363
12363
}
12364
12364
12365
12365
const wchar_t * wstr = _PyUnicode_WSTR (self );
12366
- Py_UCS4 ch = wstr [0 ];
12366
+ Py_UCS4 ch = wstr [i ++ ];
12367
+ #if SIZEOF_WCHAR_T == 2
12368
+ if (Py_UNICODE_IS_HIGH_SURROGATE (ch )
12369
+ && i < len
12370
+ && Py_UNICODE_IS_LOW_SURROGATE (wstr [i ]))
12371
+ {
12372
+ ch = Py_UNICODE_JOIN_SURROGATES (ch , wstr [i ]);
12373
+ i ++ ;
12374
+ }
12375
+ #endif
12367
12376
if (!_PyUnicode_IsXidStart (ch ) && ch != 0x5F /* LOW LINE */ ) {
12368
12377
return 0 ;
12369
12378
}
12370
12379
12371
- for (i = 1 ; i < len ; i ++ ) {
12372
- ch = wstr [i ];
12380
+ while (i < len ) {
12381
+ ch = wstr [i ++ ];
12382
+ #if SIZEOF_WCHAR_T == 2
12383
+ if (Py_UNICODE_IS_HIGH_SURROGATE (ch )
12384
+ && i < len
12385
+ && Py_UNICODE_IS_LOW_SURROGATE (wstr [i ]))
12386
+ {
12387
+ ch = Py_UNICODE_JOIN_SURROGATES (ch , wstr [i ]);
12388
+ i ++ ;
12389
+ }
12390
+ #endif
12373
12391
if (!_PyUnicode_IsXidContinue (ch )) {
12374
12392
return 0 ;
12375
12393
}
You can’t perform that action at this time.
0 commit comments