@@ -30,6 +30,184 @@ use std::os::raw::{c_char, c_int, c_uint, c_void};
30
30
// skipped Py_UNICODE_HIGH_SURROGATE
31
31
// skipped Py_UNICODE_LOW_SURROGATE
32
32
33
+ // generated by bindgen v0.63.0 (with small adaptations)
34
+ #[ repr( C ) ]
35
+ struct BitfieldUnit < Storage > {
36
+ storage : Storage ,
37
+ }
38
+
39
+ impl < Storage > BitfieldUnit < Storage > {
40
+ #[ inline]
41
+ pub const fn new ( storage : Storage ) -> Self {
42
+ Self { storage }
43
+ }
44
+ }
45
+
46
+ impl < Storage > BitfieldUnit < Storage >
47
+ where
48
+ Storage : AsRef < [ u8 ] > + AsMut < [ u8 ] > ,
49
+ {
50
+ #[ inline]
51
+ fn get_bit ( & self , index : usize ) -> bool {
52
+ debug_assert ! ( index / 8 < self . storage. as_ref( ) . len( ) ) ;
53
+ let byte_index = index / 8 ;
54
+ let byte = self . storage . as_ref ( ) [ byte_index] ;
55
+ let bit_index = if cfg ! ( target_endian = "big" ) {
56
+ 7 - ( index % 8 )
57
+ } else {
58
+ index % 8
59
+ } ;
60
+ let mask = 1 << bit_index;
61
+ byte & mask == mask
62
+ }
63
+
64
+ #[ inline]
65
+ fn set_bit ( & mut self , index : usize , val : bool ) {
66
+ debug_assert ! ( index / 8 < self . storage. as_ref( ) . len( ) ) ;
67
+ let byte_index = index / 8 ;
68
+ let byte = & mut self . storage . as_mut ( ) [ byte_index] ;
69
+ let bit_index = if cfg ! ( target_endian = "big" ) {
70
+ 7 - ( index % 8 )
71
+ } else {
72
+ index % 8
73
+ } ;
74
+ let mask = 1 << bit_index;
75
+ if val {
76
+ * byte |= mask;
77
+ } else {
78
+ * byte &= !mask;
79
+ }
80
+ }
81
+
82
+ #[ inline]
83
+ fn get ( & self , bit_offset : usize , bit_width : u8 ) -> u64 {
84
+ debug_assert ! ( bit_width <= 64 ) ;
85
+ debug_assert ! ( bit_offset / 8 < self . storage. as_ref( ) . len( ) ) ;
86
+ debug_assert ! ( ( bit_offset + ( bit_width as usize ) ) / 8 <= self . storage. as_ref( ) . len( ) ) ;
87
+ let mut val = 0 ;
88
+ for i in 0 ..( bit_width as usize ) {
89
+ if self . get_bit ( i + bit_offset) {
90
+ let index = if cfg ! ( target_endian = "big" ) {
91
+ bit_width as usize - 1 - i
92
+ } else {
93
+ i
94
+ } ;
95
+ val |= 1 << index;
96
+ }
97
+ }
98
+ val
99
+ }
100
+
101
+ #[ inline]
102
+ fn set ( & mut self , bit_offset : usize , bit_width : u8 , val : u64 ) {
103
+ debug_assert ! ( bit_width <= 64 ) ;
104
+ debug_assert ! ( bit_offset / 8 < self . storage. as_ref( ) . len( ) ) ;
105
+ debug_assert ! ( ( bit_offset + ( bit_width as usize ) ) / 8 <= self . storage. as_ref( ) . len( ) ) ;
106
+ for i in 0 ..( bit_width as usize ) {
107
+ let mask = 1 << i;
108
+ let val_bit_is_set = val & mask == mask;
109
+ let index = if cfg ! ( target_endian = "big" ) {
110
+ bit_width as usize - 1 - i
111
+ } else {
112
+ i
113
+ } ;
114
+ self . set_bit ( index + bit_offset, val_bit_is_set) ;
115
+ }
116
+ }
117
+ }
118
+
119
+ // generated by bindgen v0.63.0 (with small adaptations)
120
+ // The same code is generated for Python 3.7, 3.8, 3.9, 3.10, and 3.11, but the "ready" field
121
+ // has been removed from Python 3.12.
122
+
123
+ /// Wrapper around the `PyASCIIObject.state` bitfield with getters and setters that work
124
+ /// on most little- and big-endian architectures.
125
+ ///
126
+ /// Memory layout of C bitfields is implementation defined, so these functions are still
127
+ /// unsafe. Users must verify that they work as expected on the architectures they target.
128
+ #[ repr( C ) ]
129
+ #[ repr( align( 4 ) ) ]
130
+ struct PyASCIIObjectState {
131
+ _bitfield_align : [ u8 ; 0 ] ,
132
+ _bitfield : BitfieldUnit < [ u8 ; 4usize ] > ,
133
+ }
134
+
135
+ // c_uint and u32 are not necessarily the same type on all targets / architectures
136
+ #[ allow( clippy:: useless_transmute) ]
137
+ impl PyASCIIObjectState {
138
+ #[ inline]
139
+ unsafe fn interned ( & self ) -> c_uint {
140
+ std:: mem:: transmute ( self . _bitfield . get ( 0usize , 2u8 ) as u32 )
141
+ }
142
+
143
+ #[ inline]
144
+ unsafe fn set_interned ( & mut self , val : c_uint ) {
145
+ let val: u32 = std:: mem:: transmute ( val) ;
146
+ self . _bitfield . set ( 0usize , 2u8 , val as u64 )
147
+ }
148
+
149
+ #[ inline]
150
+ unsafe fn kind ( & self ) -> c_uint {
151
+ std:: mem:: transmute ( self . _bitfield . get ( 2usize , 3u8 ) as u32 )
152
+ }
153
+
154
+ #[ inline]
155
+ unsafe fn set_kind ( & mut self , val : c_uint ) {
156
+ let val: u32 = std:: mem:: transmute ( val) ;
157
+ self . _bitfield . set ( 2usize , 3u8 , val as u64 )
158
+ }
159
+
160
+ #[ inline]
161
+ unsafe fn compact ( & self ) -> c_uint {
162
+ std:: mem:: transmute ( self . _bitfield . get ( 5usize , 1u8 ) as u32 )
163
+ }
164
+
165
+ #[ inline]
166
+ unsafe fn set_compact ( & mut self , val : c_uint ) {
167
+ let val: u32 = std:: mem:: transmute ( val) ;
168
+ self . _bitfield . set ( 5usize , 1u8 , val as u64 )
169
+ }
170
+
171
+ #[ inline]
172
+ unsafe fn ascii ( & self ) -> c_uint {
173
+ std:: mem:: transmute ( self . _bitfield . get ( 6usize , 1u8 ) as u32 )
174
+ }
175
+
176
+ #[ inline]
177
+ unsafe fn set_ascii ( & mut self , val : c_uint ) {
178
+ let val: u32 = std:: mem:: transmute ( val) ;
179
+ self . _bitfield . set ( 6usize , 1u8 , val as u64 )
180
+ }
181
+
182
+ #[ inline]
183
+ unsafe fn ready ( & self ) -> c_uint {
184
+ std:: mem:: transmute ( self . _bitfield . get ( 7usize , 1u8 ) as u32 )
185
+ }
186
+
187
+ #[ inline]
188
+ unsafe fn set_ready ( & mut self , val : c_uint ) {
189
+ let val: u32 = std:: mem:: transmute ( val) ;
190
+ self . _bitfield . set ( 7usize , 1u8 , val as u64 )
191
+ }
192
+ }
193
+
194
+ impl From < u32 > for PyASCIIObjectState {
195
+ #[ inline]
196
+ fn from ( value : u32 ) -> Self {
197
+ PyASCIIObjectState {
198
+ _bitfield_align : [ ] ,
199
+ _bitfield : BitfieldUnit :: new ( value. to_ne_bytes ( ) ) ,
200
+ }
201
+ }
202
+ }
203
+
204
+ impl From < PyASCIIObjectState > for u32 {
205
+ #[ inline]
206
+ fn from ( value : PyASCIIObjectState ) -> Self {
207
+ u32:: from_ne_bytes ( value. _bitfield . storage )
208
+ }
209
+ }
210
+
33
211
#[ repr( C ) ]
34
212
pub struct PyASCIIObject {
35
213
pub ob_base : PyObject ,
@@ -52,34 +230,98 @@ pub struct PyASCIIObject {
52
230
}
53
231
54
232
/// Interacting with the bitfield is not actually well-defined, so we mark these APIs unsafe.
55
- ///
56
- /// In addition, they are disabled on big-endian architectures to restrict this to most "common"
57
- /// platforms, which are at least tested on CI and appear to be sound.
58
- #[ cfg( target_endian = "little" ) ]
59
233
impl PyASCIIObject {
234
+ /// Get the `interned` field of the [`PyASCIIObject`] state bitfield.
235
+ ///
236
+ /// Returns one of: [`SSTATE_NOT_INTERNED`], [`SSTATE_INTERNED_MORTAL`], [`SSTATE_INTERNED_IMMORTAL`]
60
237
#[ inline]
61
238
pub unsafe fn interned ( & self ) -> c_uint {
62
- self . state & 3
239
+ PyASCIIObjectState :: from ( self . state ) . interned ( )
63
240
}
64
241
242
+ /// Set the `interned` field of the [`PyASCIIObject`] state bitfield.
243
+ ///
244
+ /// Calling this function with an argument that is not [`SSTATE_NOT_INTERNED`],
245
+ /// [`SSTATE_INTERNED_MORTAL`], or [`SSTATE_INTERNED_IMMORTAL`] is invalid.
246
+ #[ inline]
247
+ pub unsafe fn set_interned ( & mut self , val : c_uint ) {
248
+ let mut state = PyASCIIObjectState :: from ( self . state ) ;
249
+ state. set_interned ( val) ;
250
+ self . state = u32:: from ( state) ;
251
+ }
252
+
253
+ /// Get the `kind` field of the [`PyASCIIObject`] state bitfield.
254
+ ///
255
+ /// Returns one of: [`PyUnicode_WCHAR_KIND`], [`PyUnicode_1BYTE_KIND`], [`PyUnicode_2BYTE_KIND`],
256
+ /// [`PyUnicode_4BYTE_KIND`]
65
257
#[ inline]
66
258
pub unsafe fn kind ( & self ) -> c_uint {
67
- ( self . state >> 2 ) & 7
259
+ PyASCIIObjectState :: from ( self . state ) . kind ( )
68
260
}
69
261
262
+ /// Set the `kind` field of the [`PyASCIIObject`] state bitfield.
263
+ ///
264
+ /// Calling this function with an argument that is not [`PyUnicode_WCHAR_KIND`], [`PyUnicode_1BYTE_KIND`],
265
+ /// [`PyUnicode_2BYTE_KIND`], or [`PyUnicode_4BYTE_KIND`] is invalid.
266
+ #[ inline]
267
+ pub unsafe fn set_kind ( & mut self , val : c_uint ) {
268
+ let mut state = PyASCIIObjectState :: from ( self . state ) ;
269
+ state. set_kind ( val) ;
270
+ self . state = u32:: from ( state) ;
271
+ }
272
+
273
+ /// Get the `compact` field of the [`PyASCIIObject`] state bitfield.
274
+ ///
275
+ /// Returns either `0` or `1`.
70
276
#[ inline]
71
277
pub unsafe fn compact ( & self ) -> c_uint {
72
- ( self . state >> 5 ) & 1
278
+ PyASCIIObjectState :: from ( self . state ) . compact ( )
279
+ }
280
+
281
+ /// Set the `compact` flag of the [`PyASCIIObject`] state bitfield.
282
+ ///
283
+ /// Calling this function with an argument that is neither `0` nor `1` is invalid.
284
+ #[ inline]
285
+ pub unsafe fn set_compact ( & mut self , val : c_uint ) {
286
+ let mut state = PyASCIIObjectState :: from ( self . state ) ;
287
+ state. set_compact ( val) ;
288
+ self . state = u32:: from ( state) ;
73
289
}
74
290
291
+ /// Get the `ascii` field of the [`PyASCIIObject`] state bitfield.
292
+ ///
293
+ /// Returns either `0` or `1`.
75
294
#[ inline]
76
295
pub unsafe fn ascii ( & self ) -> c_uint {
77
- ( self . state >> 6 ) & 1
296
+ PyASCIIObjectState :: from ( self . state ) . ascii ( )
78
297
}
79
298
299
+ /// Set the `ascii` flag of the [`PyASCIIObject`] state bitfield.
300
+ ///
301
+ /// Calling this function with an argument that is neither `0` nor `1` is invalid.
302
+ #[ inline]
303
+ pub unsafe fn set_ascii ( & mut self , val : c_uint ) {
304
+ let mut state = PyASCIIObjectState :: from ( self . state ) ;
305
+ state. set_ascii ( val) ;
306
+ self . state = u32:: from ( state) ;
307
+ }
308
+
309
+ /// Get the `ready` field of the [`PyASCIIObject`] state bitfield.
310
+ ///
311
+ /// Returns either `0` or `1`.
80
312
#[ inline]
81
313
pub unsafe fn ready ( & self ) -> c_uint {
82
- ( self . state >> 7 ) & 1
314
+ PyASCIIObjectState :: from ( self . state ) . ready ( )
315
+ }
316
+
317
+ /// Set the `ready` flag of the [`PyASCIIObject`] state bitfield.
318
+ ///
319
+ /// Calling this function with an argument that is neither `0` nor `1` is invalid.
320
+ #[ inline]
321
+ pub unsafe fn set_ready ( & mut self , val : c_uint ) {
322
+ let mut state = PyASCIIObjectState :: from ( self . state ) ;
323
+ state. set_ready ( val) ;
324
+ self . state = u32:: from ( state) ;
83
325
}
84
326
}
85
327
@@ -120,7 +362,6 @@ pub const SSTATE_INTERNED_MORTAL: c_uint = 1;
120
362
pub const SSTATE_INTERNED_IMMORTAL : c_uint = 2 ;
121
363
122
364
#[ inline]
123
- #[ cfg( target_endian = "little" ) ]
124
365
pub unsafe fn PyUnicode_IS_ASCII ( op : * mut PyObject ) -> c_uint {
125
366
debug_assert ! ( crate :: PyUnicode_Check ( op) != 0 ) ;
126
367
debug_assert ! ( PyUnicode_IS_READY ( op) != 0 ) ;
@@ -129,13 +370,11 @@ pub unsafe fn PyUnicode_IS_ASCII(op: *mut PyObject) -> c_uint {
129
370
}
130
371
131
372
#[ inline]
132
- #[ cfg( target_endian = "little" ) ]
133
373
pub unsafe fn PyUnicode_IS_COMPACT ( op : * mut PyObject ) -> c_uint {
134
374
( * ( op as * mut PyASCIIObject ) ) . compact ( )
135
375
}
136
376
137
377
#[ inline]
138
- #[ cfg( target_endian = "little" ) ]
139
378
pub unsafe fn PyUnicode_IS_COMPACT_ASCII ( op : * mut PyObject ) -> c_uint {
140
379
( ( * ( op as * mut PyASCIIObject ) ) . ascii ( ) != 0 && PyUnicode_IS_COMPACT ( op) != 0 ) . into ( )
141
380
}
@@ -149,25 +388,21 @@ pub const PyUnicode_2BYTE_KIND: c_uint = 2;
149
388
pub const PyUnicode_4BYTE_KIND : c_uint = 4 ;
150
389
151
390
#[ inline]
152
- #[ cfg( target_endian = "little" ) ]
153
391
pub unsafe fn PyUnicode_1BYTE_DATA ( op : * mut PyObject ) -> * mut Py_UCS1 {
154
392
PyUnicode_DATA ( op) as * mut Py_UCS1
155
393
}
156
394
157
395
#[ inline]
158
- #[ cfg( target_endian = "little" ) ]
159
396
pub unsafe fn PyUnicode_2BYTE_DATA ( op : * mut PyObject ) -> * mut Py_UCS2 {
160
397
PyUnicode_DATA ( op) as * mut Py_UCS2
161
398
}
162
399
163
400
#[ inline]
164
- #[ cfg( target_endian = "little" ) ]
165
401
pub unsafe fn PyUnicode_4BYTE_DATA ( op : * mut PyObject ) -> * mut Py_UCS4 {
166
402
PyUnicode_DATA ( op) as * mut Py_UCS4
167
403
}
168
404
169
405
#[ inline]
170
- #[ cfg( target_endian = "little" ) ]
171
406
pub unsafe fn PyUnicode_KIND ( op : * mut PyObject ) -> c_uint {
172
407
debug_assert ! ( crate :: PyUnicode_Check ( op) != 0 ) ;
173
408
debug_assert ! ( PyUnicode_IS_READY ( op) != 0 ) ;
@@ -176,7 +411,6 @@ pub unsafe fn PyUnicode_KIND(op: *mut PyObject) -> c_uint {
176
411
}
177
412
178
413
#[ inline]
179
- #[ cfg( target_endian = "little" ) ]
180
414
pub unsafe fn _PyUnicode_COMPACT_DATA ( op : * mut PyObject ) -> * mut c_void {
181
415
if PyUnicode_IS_ASCII ( op) != 0 {
182
416
( op as * mut PyASCIIObject ) . offset ( 1 ) as * mut c_void
@@ -186,15 +420,13 @@ pub unsafe fn _PyUnicode_COMPACT_DATA(op: *mut PyObject) -> *mut c_void {
186
420
}
187
421
188
422
#[ inline]
189
- #[ cfg( target_endian = "little" ) ]
190
423
pub unsafe fn _PyUnicode_NONCOMPACT_DATA ( op : * mut PyObject ) -> * mut c_void {
191
424
debug_assert ! ( !( * ( op as * mut PyUnicodeObject ) ) . data. any. is_null( ) ) ;
192
425
193
426
( * ( op as * mut PyUnicodeObject ) ) . data . any
194
427
}
195
428
196
429
#[ inline]
197
- #[ cfg( target_endian = "little" ) ]
198
430
pub unsafe fn PyUnicode_DATA ( op : * mut PyObject ) -> * mut c_void {
199
431
debug_assert ! ( crate :: PyUnicode_Check ( op) != 0 ) ;
200
432
@@ -210,7 +442,6 @@ pub unsafe fn PyUnicode_DATA(op: *mut PyObject) -> *mut c_void {
210
442
// skipped PyUnicode_READ_CHAR
211
443
212
444
#[ inline]
213
- #[ cfg( target_endian = "little" ) ]
214
445
pub unsafe fn PyUnicode_GET_LENGTH ( op : * mut PyObject ) -> Py_ssize_t {
215
446
debug_assert ! ( crate :: PyUnicode_Check ( op) != 0 ) ;
216
447
debug_assert ! ( PyUnicode_IS_READY ( op) != 0 ) ;
@@ -219,15 +450,13 @@ pub unsafe fn PyUnicode_GET_LENGTH(op: *mut PyObject) -> Py_ssize_t {
219
450
}
220
451
221
452
#[ inline]
222
- #[ cfg( target_endian = "little" ) ]
223
453
pub unsafe fn PyUnicode_IS_READY ( op : * mut PyObject ) -> c_uint {
224
454
( * ( op as * mut PyASCIIObject ) ) . ready ( )
225
455
}
226
456
227
457
#[ cfg( not( Py_3_12 ) ) ]
228
458
#[ cfg_attr( Py_3_10 , deprecated( note = "Python 3.10" ) ) ]
229
459
#[ inline]
230
- #[ cfg( target_endian = "little" ) ]
231
460
pub unsafe fn PyUnicode_READY ( op : * mut PyObject ) -> c_int {
232
461
debug_assert ! ( crate :: PyUnicode_Check ( op) != 0 ) ;
233
462
0 commit comments