@@ -1095,7 +1095,7 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
1095
1095
#define ENC_UTF32LE 4
1096
1096
1097
1097
static int
1098
- get_standard_encoding (const char * encoding , int * bytelength )
1098
+ get_standard_encoding_impl (const char * encoding , int * bytelength )
1099
1099
{
1100
1100
if (Py_TOLOWER (encoding [0 ]) == 'u' &&
1101
1101
Py_TOLOWER (encoding [1 ]) == 't' &&
@@ -1153,172 +1153,212 @@ get_standard_encoding(const char *encoding, int *bytelength)
1153
1153
return ENC_UNKNOWN ;
1154
1154
}
1155
1155
1156
- /* This handler is declared static until someone demonstrates
1157
- a need to call it directly. */
1156
+
1157
+ static int
1158
+ get_standard_encoding (PyObject * encoding , int * code , int * bytelength )
1159
+ {
1160
+ const char * encoding_cstr = PyUnicode_AsUTF8 (encoding );
1161
+ if (encoding_cstr == NULL ) {
1162
+ return -1 ;
1163
+ }
1164
+ * code = get_standard_encoding_impl (encoding_cstr , bytelength );
1165
+ return 0 ;
1166
+ }
1167
+
1168
+
1169
+ // --- handler: 'surrogatepass' -----------------------------------------------
1170
+
1158
1171
static PyObject *
1159
- PyCodec_SurrogatePassErrors (PyObject * exc )
1172
+ _PyCodec_SurrogatePassUnicodeEncodeError (PyObject * exc )
1160
1173
{
1161
- PyObject * restuple ;
1162
- PyObject * object ;
1163
- PyObject * encode ;
1164
- const char * encoding ;
1165
- int code ;
1166
- int bytelength ;
1167
- Py_ssize_t i ;
1168
- Py_ssize_t start ;
1169
- Py_ssize_t end ;
1170
- PyObject * res ;
1174
+ PyObject * encoding = PyUnicodeEncodeError_GetEncoding (exc );
1175
+ if (encoding == NULL ) {
1176
+ return NULL ;
1177
+ }
1178
+ int code , bytelength ;
1179
+ int rc = get_standard_encoding (encoding , & code , & bytelength );
1180
+ Py_DECREF (encoding );
1181
+ if (rc < 0 ) {
1182
+ return NULL ;
1183
+ }
1184
+ if (code == ENC_UNKNOWN ) {
1185
+ goto bail ;
1186
+ }
1171
1187
1172
- if (PyObject_TypeCheck (exc , (PyTypeObject * )PyExc_UnicodeEncodeError )) {
1173
- unsigned char * outp ;
1174
- if (PyUnicodeEncodeError_GetStart (exc , & start ))
1175
- return NULL ;
1176
- if (PyUnicodeEncodeError_GetEnd (exc , & end ))
1177
- return NULL ;
1178
- if (!(object = PyUnicodeEncodeError_GetObject (exc )))
1179
- return NULL ;
1180
- if (!(encode = PyUnicodeEncodeError_GetEncoding (exc ))) {
1181
- Py_DECREF (object );
1182
- return NULL ;
1183
- }
1184
- if (!(encoding = PyUnicode_AsUTF8 (encode ))) {
1185
- Py_DECREF (object );
1186
- Py_DECREF (encode );
1187
- return NULL ;
1188
- }
1189
- code = get_standard_encoding (encoding , & bytelength );
1190
- Py_DECREF (encode );
1191
- if (code == ENC_UNKNOWN ) {
1192
- /* Not supported, fail with original exception */
1193
- PyErr_SetObject (PyExceptionInstance_Class (exc ), exc );
1194
- Py_DECREF (object );
1195
- return NULL ;
1196
- }
1188
+ PyObject * obj ;
1189
+ Py_ssize_t objlen , start , end , slen ;
1190
+ if (_PyUnicodeError_GetParams (exc ,
1191
+ & obj , & objlen ,
1192
+ & start , & end , & slen , false) < 0 )
1193
+ {
1194
+ return NULL ;
1195
+ }
1197
1196
1198
- if (end - start > PY_SSIZE_T_MAX / bytelength )
1199
- end = start + PY_SSIZE_T_MAX / bytelength ;
1200
- res = PyBytes_FromStringAndSize (NULL , bytelength * (end - start ));
1201
- if (!res ) {
1202
- Py_DECREF (object );
1203
- return NULL ;
1197
+ if (slen > PY_SSIZE_T_MAX / bytelength ) {
1198
+ end = start + PY_SSIZE_T_MAX / bytelength ;
1199
+ end = Py_MIN (end , objlen );
1200
+ slen = Py_MAX (0 , end - start );
1201
+ }
1202
+
1203
+ PyObject * res = PyBytes_FromStringAndSize (NULL , bytelength * slen );
1204
+ if (res == NULL ) {
1205
+ Py_DECREF (obj );
1206
+ return NULL ;
1207
+ }
1208
+
1209
+ unsigned char * outp = (unsigned char * )PyBytes_AsString (res );
1210
+ for (Py_ssize_t i = start ; i < end ; i ++ ) {
1211
+ /* object is guaranteed to be "ready" */
1212
+ Py_UCS4 ch = PyUnicode_READ_CHAR (obj , i );
1213
+ if (!Py_UNICODE_IS_SURROGATE (ch )) {
1214
+ /* Not a surrogate, fail with original exception */
1215
+ Py_DECREF (obj );
1216
+ Py_DECREF (res );
1217
+ goto bail ;
1204
1218
}
1205
- outp = (unsigned char * )PyBytes_AsString (res );
1206
- for (i = start ; i < end ; i ++ ) {
1207
- /* object is guaranteed to be "ready" */
1208
- Py_UCS4 ch = PyUnicode_READ_CHAR (object , i );
1209
- if (!Py_UNICODE_IS_SURROGATE (ch )) {
1210
- /* Not a surrogate, fail with original exception */
1211
- PyErr_SetObject (PyExceptionInstance_Class (exc ), exc );
1212
- Py_DECREF (res );
1213
- Py_DECREF (object );
1214
- return NULL ;
1215
- }
1216
- switch (code ) {
1217
- case ENC_UTF8 :
1219
+ switch (code ) {
1220
+ case ENC_UTF8 : {
1218
1221
* outp ++ = (unsigned char )(0xe0 | (ch >> 12 ));
1219
1222
* outp ++ = (unsigned char )(0x80 | ((ch >> 6 ) & 0x3f ));
1220
1223
* outp ++ = (unsigned char )(0x80 | (ch & 0x3f ));
1221
1224
break ;
1222
- case ENC_UTF16LE :
1223
- * outp ++ = (unsigned char ) ch ;
1225
+ }
1226
+ case ENC_UTF16LE : {
1227
+ * outp ++ = (unsigned char )ch ;
1224
1228
* outp ++ = (unsigned char )(ch >> 8 );
1225
1229
break ;
1226
- case ENC_UTF16BE :
1230
+ }
1231
+ case ENC_UTF16BE : {
1227
1232
* outp ++ = (unsigned char )(ch >> 8 );
1228
- * outp ++ = (unsigned char ) ch ;
1233
+ * outp ++ = (unsigned char )ch ;
1229
1234
break ;
1230
- case ENC_UTF32LE :
1231
- * outp ++ = (unsigned char ) ch ;
1235
+ }
1236
+ case ENC_UTF32LE : {
1237
+ * outp ++ = (unsigned char )ch ;
1232
1238
* outp ++ = (unsigned char )(ch >> 8 );
1233
1239
* outp ++ = (unsigned char )(ch >> 16 );
1234
1240
* outp ++ = (unsigned char )(ch >> 24 );
1235
1241
break ;
1236
- case ENC_UTF32BE :
1242
+ }
1243
+ case ENC_UTF32BE : {
1237
1244
* outp ++ = (unsigned char )(ch >> 24 );
1238
1245
* outp ++ = (unsigned char )(ch >> 16 );
1239
1246
* outp ++ = (unsigned char )(ch >> 8 );
1240
- * outp ++ = (unsigned char ) ch ;
1247
+ * outp ++ = (unsigned char )ch ;
1241
1248
break ;
1242
1249
}
1243
1250
}
1244
- restuple = Py_BuildValue ("(On)" , res , end );
1245
- Py_DECREF (res );
1246
- Py_DECREF (object );
1247
- return restuple ;
1248
1251
}
1249
- else if (PyObject_TypeCheck (exc , (PyTypeObject * )PyExc_UnicodeDecodeError )) {
1250
- const unsigned char * p ;
1251
- Py_UCS4 ch = 0 ;
1252
- if (PyUnicodeDecodeError_GetStart (exc , & start ))
1253
- return NULL ;
1254
- if (PyUnicodeDecodeError_GetEnd (exc , & end ))
1255
- return NULL ;
1256
- if (!(object = PyUnicodeDecodeError_GetObject (exc )))
1257
- return NULL ;
1258
- p = (const unsigned char * )PyBytes_AS_STRING (object );
1259
- if (!(encode = PyUnicodeDecodeError_GetEncoding (exc ))) {
1260
- Py_DECREF (object );
1261
- return NULL ;
1262
- }
1263
- if (!(encoding = PyUnicode_AsUTF8 (encode ))) {
1264
- Py_DECREF (object );
1265
- Py_DECREF (encode );
1266
- return NULL ;
1267
- }
1268
- code = get_standard_encoding (encoding , & bytelength );
1269
- Py_DECREF (encode );
1270
- if (code == ENC_UNKNOWN ) {
1271
- /* Not supported, fail with original exception */
1272
- PyErr_SetObject (PyExceptionInstance_Class (exc ), exc );
1273
- Py_DECREF (object );
1274
- return NULL ;
1275
- }
1276
1252
1277
- /* Try decoding a single surrogate character. If
1278
- there are more, let the codec call us again. */
1279
- p += start ;
1280
- if (PyBytes_GET_SIZE (object ) - start >= bytelength ) {
1281
- switch (code ) {
1282
- case ENC_UTF8 :
1253
+ Py_DECREF (obj );
1254
+ PyObject * restuple = Py_BuildValue ("(Nn)" , res , end );
1255
+ return restuple ;
1256
+
1257
+ bail :
1258
+ PyErr_SetObject (PyExceptionInstance_Class (exc ), exc );
1259
+ return NULL ;
1260
+ }
1261
+
1262
+
1263
+ static PyObject *
1264
+ _PyCodec_SurrogatePassUnicodeDecodeError (PyObject * exc )
1265
+ {
1266
+ PyObject * encoding = PyUnicodeDecodeError_GetEncoding (exc );
1267
+ if (encoding == NULL ) {
1268
+ return NULL ;
1269
+ }
1270
+ int code , bytelength ;
1271
+ int rc = get_standard_encoding (encoding , & code , & bytelength );
1272
+ Py_DECREF (encoding );
1273
+ if (rc < 0 ) {
1274
+ return NULL ;
1275
+ }
1276
+ if (code == ENC_UNKNOWN ) {
1277
+ goto bail ;
1278
+ }
1279
+
1280
+ PyObject * obj ;
1281
+ Py_ssize_t objlen , start , end , slen ;
1282
+ if (_PyUnicodeError_GetParams (exc ,
1283
+ & obj , & objlen ,
1284
+ & start , & end , & slen , true) < 0 )
1285
+ {
1286
+ return NULL ;
1287
+ }
1288
+
1289
+ /* Try decoding a single surrogate character. If
1290
+ there are more, let the codec call us again. */
1291
+ Py_UCS4 ch = 0 ;
1292
+ const unsigned char * p = (const unsigned char * )PyBytes_AS_STRING (obj );
1293
+ p += start ;
1294
+
1295
+ if (objlen - start >= bytelength ) {
1296
+ switch (code ) {
1297
+ case ENC_UTF8 : {
1283
1298
if ((p [0 ] & 0xf0 ) == 0xe0 &&
1284
1299
(p [1 ] & 0xc0 ) == 0x80 &&
1285
- (p [2 ] & 0xc0 ) == 0x80 ) {
1300
+ (p [2 ] & 0xc0 ) == 0x80 )
1301
+ {
1286
1302
/* it's a three-byte code */
1287
- ch = ((p [0 ] & 0x0f ) << 12 ) + ((p [1 ] & 0x3f ) << 6 ) + (p [2 ] & 0x3f );
1303
+ ch = ((p [0 ] & 0x0f ) << 12 ) +
1304
+ ((p [1 ] & 0x3f ) << 6 ) +
1305
+ (p [2 ] & 0x3f );
1288
1306
}
1289
1307
break ;
1290
- case ENC_UTF16LE :
1308
+ }
1309
+ case ENC_UTF16LE : {
1291
1310
ch = p [1 ] << 8 | p [0 ];
1292
1311
break ;
1293
- case ENC_UTF16BE :
1312
+ }
1313
+ case ENC_UTF16BE : {
1294
1314
ch = p [0 ] << 8 | p [1 ];
1295
1315
break ;
1296
- case ENC_UTF32LE :
1316
+ }
1317
+ case ENC_UTF32LE : {
1297
1318
ch = (p [3 ] << 24 ) | (p [2 ] << 16 ) | (p [1 ] << 8 ) | p [0 ];
1298
1319
break ;
1299
- case ENC_UTF32BE :
1320
+ }
1321
+ case ENC_UTF32BE : {
1300
1322
ch = (p [0 ] << 24 ) | (p [1 ] << 16 ) | (p [2 ] << 8 ) | p [3 ];
1301
1323
break ;
1302
1324
}
1303
1325
}
1326
+ }
1327
+ Py_DECREF (obj );
1328
+ if (!Py_UNICODE_IS_SURROGATE (ch )) {
1329
+ goto bail ;
1330
+ }
1304
1331
1305
- Py_DECREF (object );
1306
- if (!Py_UNICODE_IS_SURROGATE (ch )) {
1307
- /* it's not a surrogate - fail */
1308
- PyErr_SetObject (PyExceptionInstance_Class (exc ), exc );
1309
- return NULL ;
1310
- }
1311
- res = PyUnicode_FromOrdinal (ch );
1312
- if (res == NULL )
1313
- return NULL ;
1314
- return Py_BuildValue ("(Nn)" , res , start + bytelength );
1332
+ PyObject * res = PyUnicode_FromOrdinal (ch );
1333
+ if (res == NULL ) {
1334
+ return NULL ;
1335
+ }
1336
+ return Py_BuildValue ("(Nn)" , res , start + bytelength );
1337
+
1338
+ bail :
1339
+ PyErr_SetObject (PyExceptionInstance_Class (exc ), exc );
1340
+ return NULL ;
1341
+ }
1342
+
1343
+
1344
+ /* This handler is declared static until someone demonstrates
1345
+ a need to call it directly. */
1346
+ static PyObject *
1347
+ PyCodec_SurrogatePassErrors (PyObject * exc )
1348
+ {
1349
+ if (_PyIsUnicodeEncodeError (exc )) {
1350
+ return _PyCodec_SurrogatePassUnicodeEncodeError (exc );
1351
+ }
1352
+ else if (_PyIsUnicodeDecodeError (exc )) {
1353
+ return _PyCodec_SurrogatePassUnicodeDecodeError (exc );
1315
1354
}
1316
1355
else {
1317
1356
wrong_exception_type (exc );
1318
1357
return NULL ;
1319
1358
}
1320
1359
}
1321
1360
1361
+
1322
1362
static PyObject *
1323
1363
PyCodec_SurrogateEscapeErrors (PyObject * exc )
1324
1364
{
@@ -1438,11 +1478,13 @@ namereplace_errors(PyObject *Py_UNUSED(self), PyObject *exc)
1438
1478
}
1439
1479
1440
1480
1441
- static PyObject * surrogatepass_errors (PyObject * self , PyObject * exc )
1481
+ static inline PyObject *
1482
+ surrogatepass_errors (PyObject * Py_UNUSED (self ), PyObject * exc )
1442
1483
{
1443
1484
return PyCodec_SurrogatePassErrors (exc );
1444
1485
}
1445
1486
1487
+
1446
1488
static PyObject * surrogateescape_errors (PyObject * self , PyObject * exc )
1447
1489
{
1448
1490
return PyCodec_SurrogateEscapeErrors (exc );
0 commit comments