Skip to content

Commit 2ecf9c2

Browse files
vstinnerestyxx
authored andcommitted
pythongh-119182: Rewrite PyUnicodeWriter tests in Python (python#120845)
1 parent e94c9db commit 2ecf9c2

File tree

2 files changed

+336
-262
lines changed

2 files changed

+336
-262
lines changed

Lib/test/test_capi/test_unicode.py

+149-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616
import _testinternalcapi
1717
except ImportError:
1818
_testinternalcapi = None
19+
try:
20+
import ctypes
21+
except ImportError:
22+
ctypes = None
1923

2024

2125
NULL = None
@@ -352,13 +356,13 @@ def test_fromobject(self):
352356
self.assertRaises(TypeError, fromobject, [])
353357
# CRASHES fromobject(NULL)
354358

359+
@unittest.skipIf(ctypes is None, 'need ctypes')
355360
def test_from_format(self):
356361
"""Test PyUnicode_FromFormat()"""
357362
# Length modifiers "j" and "t" are not tested here because ctypes does
358363
# not expose types for intmax_t and ptrdiff_t.
359364
# _testlimitedcapi.test_string_from_format() has a wider coverage of all
360365
# formats.
361-
import_helper.import_module('ctypes')
362366
from ctypes import (
363367
c_char_p,
364368
pythonapi, py_object, sizeof,
@@ -1676,5 +1680,149 @@ def test_pep393_utf8_caching_bug(self):
16761680
self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
16771681

16781682

1683+
class PyUnicodeWriterTest(unittest.TestCase):
1684+
def create_writer(self, size):
1685+
return _testcapi.PyUnicodeWriter(size)
1686+
1687+
def test_basic(self):
1688+
writer = self.create_writer(100)
1689+
1690+
# test PyUnicodeWriter_WriteUTF8()
1691+
writer.write_utf8(b'var', -1)
1692+
1693+
# test PyUnicodeWriter_WriteChar()
1694+
writer.write_char('=')
1695+
1696+
# test PyUnicodeWriter_WriteSubstring()
1697+
writer.write_substring("[long]", 1, 5);
1698+
1699+
# test PyUnicodeWriter_WriteStr()
1700+
writer.write_str(" value ")
1701+
1702+
# test PyUnicodeWriter_WriteRepr()
1703+
writer.write_repr("repr")
1704+
1705+
self.assertEqual(writer.finish(),
1706+
"var=long value 'repr'")
1707+
1708+
def test_utf8(self):
1709+
writer = self.create_writer(0)
1710+
writer.write_utf8(b"ascii", -1)
1711+
writer.write_char('-')
1712+
writer.write_utf8(b"latin1=\xC3\xA9", -1)
1713+
writer.write_char('-')
1714+
writer.write_utf8(b"euro=\xE2\x82\xAC", -1)
1715+
writer.write_char('.')
1716+
self.assertEqual(writer.finish(),
1717+
"ascii-latin1=\xE9-euro=\u20AC.")
1718+
1719+
def test_invalid_utf8(self):
1720+
writer = self.create_writer(0)
1721+
with self.assertRaises(UnicodeDecodeError):
1722+
writer.write_utf8(b"invalid=\xFF", -1)
1723+
1724+
def test_recover_utf8_error(self):
1725+
# test recovering from PyUnicodeWriter_WriteUTF8() error
1726+
writer = self.create_writer(0)
1727+
writer.write_utf8(b"value=", -1)
1728+
1729+
# write fails with an invalid string
1730+
with self.assertRaises(UnicodeDecodeError):
1731+
writer.write_utf8(b"invalid\xFF", -1)
1732+
1733+
# retry write with a valid string
1734+
writer.write_utf8(b"valid", -1)
1735+
1736+
self.assertEqual(writer.finish(),
1737+
"value=valid")
1738+
1739+
def test_decode_utf8(self):
1740+
# test PyUnicodeWriter_DecodeUTF8Stateful()
1741+
writer = self.create_writer(0)
1742+
writer.decodeutf8stateful(b"ign\xFFore", -1, b"ignore")
1743+
writer.write_char('-')
1744+
writer.decodeutf8stateful(b"replace\xFF", -1, b"replace")
1745+
writer.write_char('-')
1746+
1747+
# incomplete trailing UTF-8 sequence
1748+
writer.decodeutf8stateful(b"incomplete\xC3", -1, b"replace")
1749+
1750+
self.assertEqual(writer.finish(),
1751+
"ignore-replace\uFFFD-incomplete\uFFFD")
1752+
1753+
def test_decode_utf8_consumed(self):
1754+
# test PyUnicodeWriter_DecodeUTF8Stateful() with consumed
1755+
writer = self.create_writer(0)
1756+
1757+
# valid string
1758+
consumed = writer.decodeutf8stateful(b"text", -1, b"strict", True)
1759+
self.assertEqual(consumed, 4)
1760+
writer.write_char('-')
1761+
1762+
# non-ASCII
1763+
consumed = writer.decodeutf8stateful(b"\xC3\xA9-\xE2\x82\xAC", 6, b"strict", True)
1764+
self.assertEqual(consumed, 6)
1765+
writer.write_char('-')
1766+
1767+
# invalid UTF-8 (consumed is 0 on error)
1768+
with self.assertRaises(UnicodeDecodeError):
1769+
writer.decodeutf8stateful(b"invalid\xFF", -1, b"strict", True)
1770+
1771+
# ignore error handler
1772+
consumed = writer.decodeutf8stateful(b"more\xFF", -1, b"ignore", True)
1773+
self.assertEqual(consumed, 5)
1774+
writer.write_char('-')
1775+
1776+
# incomplete trailing UTF-8 sequence
1777+
consumed = writer.decodeutf8stateful(b"incomplete\xC3", -1, b"ignore", True)
1778+
self.assertEqual(consumed, 10)
1779+
1780+
self.assertEqual(writer.finish(), "text-\xE9-\u20AC-more-incomplete")
1781+
1782+
def test_widechar(self):
1783+
writer = self.create_writer(0)
1784+
writer.write_widechar("latin1=\xE9")
1785+
writer.write_widechar("-")
1786+
writer.write_widechar("euro=\u20AC")
1787+
writer.write_char('.')
1788+
self.assertEqual(writer.finish(), "latin1=\xE9-euro=\u20AC.")
1789+
1790+
1791+
@unittest.skipIf(ctypes is None, 'need ctypes')
1792+
class PyUnicodeWriterFormatTest(unittest.TestCase):
1793+
def create_writer(self, size):
1794+
return _testcapi.PyUnicodeWriter(size)
1795+
1796+
def writer_format(self, writer, *args):
1797+
from ctypes import c_char_p, pythonapi, c_int, c_void_p
1798+
_PyUnicodeWriter_Format = getattr(pythonapi, "PyUnicodeWriter_Format")
1799+
_PyUnicodeWriter_Format.argtypes = (c_void_p, c_char_p,)
1800+
_PyUnicodeWriter_Format.restype = c_int
1801+
1802+
if _PyUnicodeWriter_Format(writer.get_pointer(), *args) < 0:
1803+
raise ValueError("PyUnicodeWriter_Format failed")
1804+
1805+
def test_format(self):
1806+
from ctypes import c_int
1807+
writer = self.create_writer(0)
1808+
self.writer_format(writer, b'%s %i', b'abc', c_int(123))
1809+
writer.write_char('.')
1810+
self.assertEqual(writer.finish(), 'abc 123.')
1811+
1812+
def test_recover_error(self):
1813+
# test recovering from PyUnicodeWriter_Format() error
1814+
writer = self.create_writer(0)
1815+
self.writer_format(writer, b"%s ", b"Hello")
1816+
1817+
# PyUnicodeWriter_Format() fails with an invalid format string
1818+
with self.assertRaises(ValueError):
1819+
self.writer_format(writer, b"%s\xff", b"World")
1820+
1821+
# Retry PyUnicodeWriter_Format() with a valid format string
1822+
self.writer_format(writer, b"%s.", b"World")
1823+
1824+
self.assertEqual(writer.finish(), 'Hello World.')
1825+
1826+
16791827
if __name__ == "__main__":
16801828
unittest.main()

0 commit comments

Comments
 (0)