@@ -374,6 +374,119 @@ test_unicodewriter_recover_error(PyObject *self, PyObject *Py_UNUSED(args))
374
374
}
375
375
376
376
377
+ static PyObject *
378
+ test_unicodewriter_decode_utf8 (PyObject * self , PyObject * Py_UNUSED (args ))
379
+ {
380
+ // test PyUnicodeWriter_DecodeUTF8Stateful()
381
+ PyUnicodeWriter * writer = PyUnicodeWriter_Create (0 );
382
+ if (writer == NULL ) {
383
+ return NULL ;
384
+ }
385
+ if (PyUnicodeWriter_DecodeUTF8Stateful (writer , "ign\xFFore" , -1 , "ignore" , NULL ) < 0 ) {
386
+ goto error ;
387
+ }
388
+ if (PyUnicodeWriter_WriteChar (writer , '-' ) < 0 ) {
389
+ goto error ;
390
+ }
391
+ if (PyUnicodeWriter_DecodeUTF8Stateful (writer , "replace\xFF" , -1 , "replace" , NULL ) < 0 ) {
392
+ goto error ;
393
+ }
394
+ if (PyUnicodeWriter_WriteChar (writer , '-' ) < 0 ) {
395
+ goto error ;
396
+ }
397
+
398
+ // incomplete trailing UTF-8 sequence
399
+ if (PyUnicodeWriter_DecodeUTF8Stateful (writer , "incomplete\xC3" , -1 , "replace" , NULL ) < 0 ) {
400
+ goto error ;
401
+ }
402
+
403
+ PyObject * result = PyUnicodeWriter_Finish (writer );
404
+ if (result == NULL ) {
405
+ return NULL ;
406
+ }
407
+ assert (PyUnicode_EqualToUTF8 (result ,
408
+ "ignore-replace\xef\xbf\xbd"
409
+ "-incomplete\xef\xbf\xbd" ));
410
+ Py_DECREF (result );
411
+
412
+ Py_RETURN_NONE ;
413
+
414
+ error :
415
+ PyUnicodeWriter_Discard (writer );
416
+ return NULL ;
417
+ }
418
+
419
+
420
+ static PyObject *
421
+ test_unicodewriter_decode_utf8_consumed (PyObject * self , PyObject * Py_UNUSED (args ))
422
+ {
423
+ // test PyUnicodeWriter_DecodeUTF8Stateful()
424
+ PyUnicodeWriter * writer = PyUnicodeWriter_Create (0 );
425
+ if (writer == NULL ) {
426
+ return NULL ;
427
+ }
428
+ Py_ssize_t consumed ;
429
+
430
+ // valid string
431
+ consumed = 12345 ;
432
+ if (PyUnicodeWriter_DecodeUTF8Stateful (writer , "text" , -1 , NULL , & consumed ) < 0 ) {
433
+ goto error ;
434
+ }
435
+ assert (consumed == 4 );
436
+ if (PyUnicodeWriter_WriteChar (writer , '-' ) < 0 ) {
437
+ goto error ;
438
+ }
439
+
440
+ // non-ASCII
441
+ consumed = 12345 ;
442
+ if (PyUnicodeWriter_DecodeUTF8Stateful (writer , "\xC3\xA9-\xE2\x82\xAC" , 6 , NULL , & consumed ) < 0 ) {
443
+ goto error ;
444
+ }
445
+ assert (consumed == 6 );
446
+ if (PyUnicodeWriter_WriteChar (writer , '-' ) < 0 ) {
447
+ goto error ;
448
+ }
449
+
450
+ // consumed is 0 if write fails
451
+ consumed = 12345 ;
452
+ assert (PyUnicodeWriter_DecodeUTF8Stateful (writer , "invalid\xFF" , -1 , NULL , & consumed ) < 0 );
453
+ PyErr_Clear ();
454
+ assert (consumed == 0 );
455
+
456
+ // ignore error handler
457
+ consumed = 12345 ;
458
+ if (PyUnicodeWriter_DecodeUTF8Stateful (writer , "more\xFF" , -1 , "ignore" , & consumed ) < 0 ) {
459
+ goto error ;
460
+ }
461
+ assert (consumed == 5 );
462
+ if (PyUnicodeWriter_WriteChar (writer , '-' ) < 0 ) {
463
+ goto error ;
464
+ }
465
+
466
+ // incomplete trailing UTF-8 sequence
467
+ consumed = 12345 ;
468
+ if (PyUnicodeWriter_DecodeUTF8Stateful (writer , "incomplete\xC3" , -1 , "ignore" , & consumed ) < 0 ) {
469
+ goto error ;
470
+ }
471
+ assert (consumed == 10 );
472
+
473
+ PyObject * result = PyUnicodeWriter_Finish (writer );
474
+ if (result == NULL ) {
475
+ return NULL ;
476
+ }
477
+ assert (PyUnicode_EqualToUTF8 (result ,
478
+ "text-\xC3\xA9-\xE2\x82\xAC-"
479
+ "more-incomplete" ));
480
+ Py_DECREF (result );
481
+
482
+ Py_RETURN_NONE ;
483
+
484
+ error :
485
+ PyUnicodeWriter_Discard (writer );
486
+ return NULL ;
487
+ }
488
+
489
+
377
490
static PyObject *
378
491
test_unicodewriter_format (PyObject * self , PyObject * Py_UNUSED (args ))
379
492
{
@@ -436,6 +549,42 @@ test_unicodewriter_format_recover_error(PyObject *self, PyObject *Py_UNUSED(args
436
549
}
437
550
438
551
552
+ static PyObject *
553
+ test_unicodewriter_widechar (PyObject * self , PyObject * Py_UNUSED (args ))
554
+ {
555
+ PyUnicodeWriter * writer = PyUnicodeWriter_Create (0 );
556
+ if (writer == NULL ) {
557
+ return NULL ;
558
+ }
559
+ if (PyUnicodeWriter_WriteWideChar (writer , L"latin1=\xE9 IGNORED" , 8 ) < 0 ) {
560
+ goto error ;
561
+ }
562
+ if (PyUnicodeWriter_WriteWideChar (writer , L"-" , 1 ) < 0 ) {
563
+ goto error ;
564
+ }
565
+ if (PyUnicodeWriter_WriteWideChar (writer , L"euro=\u20AC" , -1 ) < 0 ) {
566
+ goto error ;
567
+ }
568
+ if (PyUnicodeWriter_WriteChar (writer , '.' ) < 0 ) {
569
+ goto error ;
570
+ }
571
+
572
+ PyObject * result = PyUnicodeWriter_Finish (writer );
573
+ if (result == NULL ) {
574
+ return NULL ;
575
+ }
576
+ assert (PyUnicode_EqualToUTF8 (result ,
577
+ "latin1=\xC3\xA9-euro=\xE2\x82\xAC." ));
578
+ Py_DECREF (result );
579
+
580
+ Py_RETURN_NONE ;
581
+
582
+ error :
583
+ PyUnicodeWriter_Discard (writer );
584
+ return NULL ;
585
+ }
586
+
587
+
439
588
static PyMethodDef TestMethods [] = {
440
589
{"unicode_new" , unicode_new , METH_VARARGS },
441
590
{"unicode_fill" , unicode_fill , METH_VARARGS },
@@ -448,8 +597,11 @@ static PyMethodDef TestMethods[] = {
448
597
{"test_unicodewriter_utf8" , test_unicodewriter_utf8 , METH_NOARGS },
449
598
{"test_unicodewriter_invalid_utf8" , test_unicodewriter_invalid_utf8 , METH_NOARGS },
450
599
{"test_unicodewriter_recover_error" , test_unicodewriter_recover_error , METH_NOARGS },
600
+ {"test_unicodewriter_decode_utf8" , test_unicodewriter_decode_utf8 , METH_NOARGS },
601
+ {"test_unicodewriter_decode_utf8_consumed" , test_unicodewriter_decode_utf8_consumed , METH_NOARGS },
451
602
{"test_unicodewriter_format" , test_unicodewriter_format , METH_NOARGS },
452
603
{"test_unicodewriter_format_recover_error" , test_unicodewriter_format_recover_error , METH_NOARGS },
604
+ {"test_unicodewriter_widechar" , test_unicodewriter_widechar , METH_NOARGS },
453
605
{NULL },
454
606
};
455
607
0 commit comments