|
16 | 16 | import _testinternalcapi
|
17 | 17 | except ImportError:
|
18 | 18 | _testinternalcapi = None
|
| 19 | +try: |
| 20 | + import ctypes |
| 21 | +except ImportError: |
| 22 | + ctypes = None |
19 | 23 |
|
20 | 24 |
|
21 | 25 | NULL = None
|
@@ -352,13 +356,13 @@ def test_fromobject(self):
|
352 | 356 | self.assertRaises(TypeError, fromobject, [])
|
353 | 357 | # CRASHES fromobject(NULL)
|
354 | 358 |
|
| 359 | + @unittest.skipIf(ctypes is None, 'need ctypes') |
355 | 360 | def test_from_format(self):
|
356 | 361 | """Test PyUnicode_FromFormat()"""
|
357 | 362 | # Length modifiers "j" and "t" are not tested here because ctypes does
|
358 | 363 | # not expose types for intmax_t and ptrdiff_t.
|
359 | 364 | # _testlimitedcapi.test_string_from_format() has a wider coverage of all
|
360 | 365 | # formats.
|
361 |
| - import_helper.import_module('ctypes') |
362 | 366 | from ctypes import (
|
363 | 367 | c_char_p,
|
364 | 368 | pythonapi, py_object, sizeof,
|
@@ -1676,5 +1680,149 @@ def test_pep393_utf8_caching_bug(self):
|
1676 | 1680 | self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
|
1677 | 1681 |
|
1678 | 1682 |
|
| 1683 | +class PyUnicodeWriterTest(unittest.TestCase): |
| 1684 | + def create_writer(self, size): |
| 1685 | + return _testcapi.PyUnicodeWriter(size) |
| 1686 | + |
| 1687 | + def test_basic(self): |
| 1688 | + writer = self.create_writer(100) |
| 1689 | + |
| 1690 | + # test PyUnicodeWriter_WriteUTF8() |
| 1691 | + writer.write_utf8(b'var', -1) |
| 1692 | + |
| 1693 | + # test PyUnicodeWriter_WriteChar() |
| 1694 | + writer.write_char('=') |
| 1695 | + |
| 1696 | + # test PyUnicodeWriter_WriteSubstring() |
| 1697 | + writer.write_substring("[long]", 1, 5); |
| 1698 | + |
| 1699 | + # test PyUnicodeWriter_WriteStr() |
| 1700 | + writer.write_str(" value ") |
| 1701 | + |
| 1702 | + # test PyUnicodeWriter_WriteRepr() |
| 1703 | + writer.write_repr("repr") |
| 1704 | + |
| 1705 | + self.assertEqual(writer.finish(), |
| 1706 | + "var=long value 'repr'") |
| 1707 | + |
| 1708 | + def test_utf8(self): |
| 1709 | + writer = self.create_writer(0) |
| 1710 | + writer.write_utf8(b"ascii", -1) |
| 1711 | + writer.write_char('-') |
| 1712 | + writer.write_utf8(b"latin1=\xC3\xA9", -1) |
| 1713 | + writer.write_char('-') |
| 1714 | + writer.write_utf8(b"euro=\xE2\x82\xAC", -1) |
| 1715 | + writer.write_char('.') |
| 1716 | + self.assertEqual(writer.finish(), |
| 1717 | + "ascii-latin1=\xE9-euro=\u20AC.") |
| 1718 | + |
| 1719 | + def test_invalid_utf8(self): |
| 1720 | + writer = self.create_writer(0) |
| 1721 | + with self.assertRaises(UnicodeDecodeError): |
| 1722 | + writer.write_utf8(b"invalid=\xFF", -1) |
| 1723 | + |
| 1724 | + def test_recover_utf8_error(self): |
| 1725 | + # test recovering from PyUnicodeWriter_WriteUTF8() error |
| 1726 | + writer = self.create_writer(0) |
| 1727 | + writer.write_utf8(b"value=", -1) |
| 1728 | + |
| 1729 | + # write fails with an invalid string |
| 1730 | + with self.assertRaises(UnicodeDecodeError): |
| 1731 | + writer.write_utf8(b"invalid\xFF", -1) |
| 1732 | + |
| 1733 | + # retry write with a valid string |
| 1734 | + writer.write_utf8(b"valid", -1) |
| 1735 | + |
| 1736 | + self.assertEqual(writer.finish(), |
| 1737 | + "value=valid") |
| 1738 | + |
| 1739 | + def test_decode_utf8(self): |
| 1740 | + # test PyUnicodeWriter_DecodeUTF8Stateful() |
| 1741 | + writer = self.create_writer(0) |
| 1742 | + writer.decodeutf8stateful(b"ign\xFFore", -1, b"ignore") |
| 1743 | + writer.write_char('-') |
| 1744 | + writer.decodeutf8stateful(b"replace\xFF", -1, b"replace") |
| 1745 | + writer.write_char('-') |
| 1746 | + |
| 1747 | + # incomplete trailing UTF-8 sequence |
| 1748 | + writer.decodeutf8stateful(b"incomplete\xC3", -1, b"replace") |
| 1749 | + |
| 1750 | + self.assertEqual(writer.finish(), |
| 1751 | + "ignore-replace\uFFFD-incomplete\uFFFD") |
| 1752 | + |
| 1753 | + def test_decode_utf8_consumed(self): |
| 1754 | + # test PyUnicodeWriter_DecodeUTF8Stateful() with consumed |
| 1755 | + writer = self.create_writer(0) |
| 1756 | + |
| 1757 | + # valid string |
| 1758 | + consumed = writer.decodeutf8stateful(b"text", -1, b"strict", True) |
| 1759 | + self.assertEqual(consumed, 4) |
| 1760 | + writer.write_char('-') |
| 1761 | + |
| 1762 | + # non-ASCII |
| 1763 | + consumed = writer.decodeutf8stateful(b"\xC3\xA9-\xE2\x82\xAC", 6, b"strict", True) |
| 1764 | + self.assertEqual(consumed, 6) |
| 1765 | + writer.write_char('-') |
| 1766 | + |
| 1767 | + # invalid UTF-8 (consumed is 0 on error) |
| 1768 | + with self.assertRaises(UnicodeDecodeError): |
| 1769 | + writer.decodeutf8stateful(b"invalid\xFF", -1, b"strict", True) |
| 1770 | + |
| 1771 | + # ignore error handler |
| 1772 | + consumed = writer.decodeutf8stateful(b"more\xFF", -1, b"ignore", True) |
| 1773 | + self.assertEqual(consumed, 5) |
| 1774 | + writer.write_char('-') |
| 1775 | + |
| 1776 | + # incomplete trailing UTF-8 sequence |
| 1777 | + consumed = writer.decodeutf8stateful(b"incomplete\xC3", -1, b"ignore", True) |
| 1778 | + self.assertEqual(consumed, 10) |
| 1779 | + |
| 1780 | + self.assertEqual(writer.finish(), "text-\xE9-\u20AC-more-incomplete") |
| 1781 | + |
| 1782 | + def test_widechar(self): |
| 1783 | + writer = self.create_writer(0) |
| 1784 | + writer.write_widechar("latin1=\xE9") |
| 1785 | + writer.write_widechar("-") |
| 1786 | + writer.write_widechar("euro=\u20AC") |
| 1787 | + writer.write_char('.') |
| 1788 | + self.assertEqual(writer.finish(), "latin1=\xE9-euro=\u20AC.") |
| 1789 | + |
| 1790 | + |
| 1791 | +@unittest.skipIf(ctypes is None, 'need ctypes') |
| 1792 | +class PyUnicodeWriterFormatTest(unittest.TestCase): |
| 1793 | + def create_writer(self, size): |
| 1794 | + return _testcapi.PyUnicodeWriter(size) |
| 1795 | + |
| 1796 | + def writer_format(self, writer, *args): |
| 1797 | + from ctypes import c_char_p, pythonapi, c_int, c_void_p |
| 1798 | + _PyUnicodeWriter_Format = getattr(pythonapi, "PyUnicodeWriter_Format") |
| 1799 | + _PyUnicodeWriter_Format.argtypes = (c_void_p, c_char_p,) |
| 1800 | + _PyUnicodeWriter_Format.restype = c_int |
| 1801 | + |
| 1802 | + if _PyUnicodeWriter_Format(writer.get_pointer(), *args) < 0: |
| 1803 | + raise ValueError("PyUnicodeWriter_Format failed") |
| 1804 | + |
| 1805 | + def test_format(self): |
| 1806 | + from ctypes import c_int |
| 1807 | + writer = self.create_writer(0) |
| 1808 | + self.writer_format(writer, b'%s %i', b'abc', c_int(123)) |
| 1809 | + writer.write_char('.') |
| 1810 | + self.assertEqual(writer.finish(), 'abc 123.') |
| 1811 | + |
| 1812 | + def test_recover_error(self): |
| 1813 | + # test recovering from PyUnicodeWriter_Format() error |
| 1814 | + writer = self.create_writer(0) |
| 1815 | + self.writer_format(writer, b"%s ", b"Hello") |
| 1816 | + |
| 1817 | + # PyUnicodeWriter_Format() fails with an invalid format string |
| 1818 | + with self.assertRaises(ValueError): |
| 1819 | + self.writer_format(writer, b"%s\xff", b"World") |
| 1820 | + |
| 1821 | + # Retry PyUnicodeWriter_Format() with a valid format string |
| 1822 | + self.writer_format(writer, b"%s.", b"World") |
| 1823 | + |
| 1824 | + self.assertEqual(writer.finish(), 'Hello World.') |
| 1825 | + |
| 1826 | + |
1679 | 1827 | if __name__ == "__main__":
|
1680 | 1828 | unittest.main()
|
0 commit comments