Skip to content

Commit 10517d7

Browse files
committed
Remove wstr and wstr_length
1 parent 718df6d commit 10517d7

File tree

5 files changed

+60
-600
lines changed

5 files changed

+60
-600
lines changed

Include/cpython/unicodeobject.h

Lines changed: 7 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363

6464
/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
6565
structure. state.ascii and state.compact are set, and the data
66-
immediately follow the structure. utf8_length and wstr_length can be found
66+
immediately follow the structure. utf8_length can be found
6767
in the length field; the utf8 pointer is equal to the data pointer. */
6868
typedef struct {
6969
/* There are 4 forms of Unicode strings:
@@ -76,7 +76,7 @@ typedef struct {
7676
* compact = 1
7777
* ascii = 1
7878
* ready = 1
79-
* (length is the length of the utf8 and wstr strings)
79+
* (length is the length of the utf8)
8080
* (data starts just after the structure)
8181
* (since ASCII is decoded from UTF-8, the utf8 string are the data)
8282
@@ -91,51 +91,25 @@ typedef struct {
9191
* ascii = 0
9292
* utf8 is not shared with data
9393
* utf8_length = 0 if utf8 is NULL
94-
* wstr is shared with data and wstr_length=length
95-
if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
96-
or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
97-
* wstr_length = 0 if wstr is NULL
9894
* (data starts just after the structure)
9995
100-
- legacy string, not ready:
101-
102-
* structure = PyUnicodeObject
103-
* test: kind == PyUnicode_WCHAR_KIND
104-
* length = 0 (use wstr_length)
105-
* hash = -1
106-
* kind = PyUnicode_WCHAR_KIND
107-
* compact = 0
108-
* ascii = 0
109-
* ready = 0
110-
* interned = SSTATE_NOT_INTERNED
111-
* wstr is not NULL
112-
* data.any is NULL
113-
* utf8 is NULL
114-
* utf8_length = 0
115-
11696
- legacy string, ready:
11797
11898
* structure = PyUnicodeObject structure
119-
* test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
99+
* test: !PyUnicode_IS_COMPACT(op)
120100
* kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
121101
PyUnicode_4BYTE_KIND
122102
* compact = 0
123103
* ready = 1
124104
* data.any is not NULL
125105
* utf8 is shared and utf8_length = length with data.any if ascii = 1
126106
* utf8_length = 0 if utf8 is NULL
127-
* wstr is shared with data.any and wstr_length = length
128-
if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
129-
or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
130-
* wstr_length = 0 if wstr is NULL
131107
132108
Compact strings use only one memory block (structure + characters),
133109
whereas legacy strings use one block for the structure and one block
134110
for characters.
135111
136-
Legacy strings are created by PyUnicode_FromUnicode() and
137-
PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
138-
when PyUnicode_READY() is called.
112+
Legacy strings are created by subclasses of Unicode.
139113
140114
See also _PyUnicode_CheckConsistency().
141115
*/
@@ -154,11 +128,6 @@ typedef struct {
154128
unsigned int interned:2;
155129
/* Character size:
156130
157-
- PyUnicode_WCHAR_KIND (0):
158-
159-
* character type = wchar_t (16 or 32 bits, depending on the
160-
platform)
161-
162131
- PyUnicode_1BYTE_KIND (1):
163132
164133
* character type = Py_UCS1 (8 bits, unsigned)
@@ -198,7 +167,6 @@ typedef struct {
198167
4 bytes (see issue #19537 on m68k). */
199168
unsigned int :24;
200169
} state;
201-
wchar_t *wstr; /* wchar_t representation (null-terminated) */
202170
} PyASCIIObject;
203171

204172
/* Non-ASCII strings allocated through PyUnicode_New use the
@@ -209,13 +177,9 @@ typedef struct {
209177
Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the
210178
* terminating \0. */
211179
char *utf8; /* UTF-8 representation (null-terminated) */
212-
Py_ssize_t wstr_length; /* Number of code points in wstr, possible
213-
* surrogates count as two code points. */
214180
} PyCompactUnicodeObject;
215181

216-
/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
217-
PyUnicodeObject structure. The actual string data is initially in the wstr
218-
block, and copied into the data block using _PyUnicode_Ready. */
182+
/* Object format for Unicode subclasses. */
219183
typedef struct {
220184
PyCompactUnicodeObject _base;
221185
union {
@@ -298,10 +262,6 @@ static inline int PyUnicode_IS_COMPACT_ASCII(PyObject *op) {
298262
#endif
299263

300264
enum PyUnicode_Kind {
301-
/* String contains only wstr byte characters. This is only possible
302-
when the string was created with a legacy API and _PyUnicode_Ready()
303-
has not been called yet. */
304-
PyUnicode_WCHAR_KIND = 0,
305265
/* Return values of the PyUnicode_KIND() function: */
306266
PyUnicode_1BYTE_KIND = 1,
307267
PyUnicode_2BYTE_KIND = 2,
@@ -459,27 +419,14 @@ PyAPI_FUNC(PyObject*) PyUnicode_New(
459419
Py_UCS4 maxchar /* maximum code point value in the string */
460420
);
461421

462-
/* Initializes the canonical string representation from the deprecated
463-
wstr/Py_UNICODE representation. This function is used to convert Unicode
464-
objects which were created using the old API to the new flexible format
465-
introduced with PEP 393.
466-
467-
Don't call this function directly, use the public PyUnicode_READY() function
468-
instead. */
469-
PyAPI_FUNC(int) _PyUnicode_Ready(
470-
PyObject *unicode /* Unicode object */
471-
);
472-
473422
/* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
474423
case. If the canonical representation is not yet set, it will still call
475424
_PyUnicode_Ready().
476425
Returns 0 on success and -1 on errors. */
477426
static inline int PyUnicode_READY(PyObject *op)
478427
{
479-
if (PyUnicode_IS_READY(op)) {
480-
return 0;
481-
}
482-
return _PyUnicode_Ready(op);
428+
assert(PyUnicode_IS_READY(op));
429+
return 0;
483430
}
484431
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
485432
# define PyUnicode_READY(op) PyUnicode_READY(_PyObject_CAST(op))

0 commit comments

Comments
 (0)