Skip to content

Commit 374abde

Browse files
authored
gh-104400: pygettext: use an AST parser instead of a tokenizer (GH-104402)
This greatly simplifies the code and fixes many corner cases.
1 parent 1da412e commit 374abde

File tree

7 files changed

+177
-254
lines changed

7 files changed

+177
-254
lines changed

Lib/test/test_tools/i18n_data/docstrings.pot

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,26 +15,40 @@ msgstr ""
1515
"Generated-By: pygettext.py 1.5\n"
1616

1717

18-
#: docstrings.py:7
18+
#: docstrings.py:1
19+
#, docstring
20+
msgid "Module docstring"
21+
msgstr ""
22+
23+
#: docstrings.py:9
1924
#, docstring
2025
msgid ""
2126
msgstr ""
2227

23-
#: docstrings.py:18
28+
#: docstrings.py:15
29+
#, docstring
30+
msgid "docstring"
31+
msgstr ""
32+
33+
#: docstrings.py:20
2434
#, docstring
2535
msgid ""
2636
"multiline\n"
27-
" docstring\n"
28-
" "
37+
"docstring"
2938
msgstr ""
3039

31-
#: docstrings.py:25
40+
#: docstrings.py:27
3241
#, docstring
3342
msgid "docstring1"
3443
msgstr ""
3544

36-
#: docstrings.py:30
45+
#: docstrings.py:38
46+
#, docstring
47+
msgid "nested docstring"
48+
msgstr ""
49+
50+
#: docstrings.py:43
3751
#, docstring
38-
msgid "Hello, {}!"
52+
msgid "nested class docstring"
3953
msgstr ""
4054

Lib/test/test_tools/i18n_data/docstrings.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
"""Module docstring"""
2+
13
# Test docstring extraction
24
from gettext import gettext as _
35

@@ -10,10 +12,10 @@ def test(x):
1012
# Leading empty line
1113
def test2(x):
1214

13-
"""docstring""" # XXX This should be extracted but isn't.
15+
"""docstring"""
1416

1517

16-
# XXX Multiline docstrings should be cleaned with `inspect.cleandoc`.
18+
# Multiline docstrings are cleaned with `inspect.cleandoc`.
1719
def test3(x):
1820
"""multiline
1921
docstring
@@ -27,15 +29,15 @@ def test4(x):
2729

2830

2931
def test5(x):
30-
"""Hello, {}!""".format("world!") # XXX This should not be extracted.
32+
"""Hello, {}!""".format("world!") # This should not be extracted.
3133

3234

3335
# Nested docstrings
3436
def test6(x):
3537
def inner(y):
36-
"""nested docstring""" # XXX This should be extracted but isn't.
38+
"""nested docstring"""
3739

3840

3941
class Outer:
4042
class Inner:
41-
"nested class docstring" # XXX This should be extracted but isn't.
43+
"nested class docstring"

Lib/test/test_tools/i18n_data/messages.pot

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,22 @@ msgstr ""
1919
msgid ""
2020
msgstr ""
2121

22-
#: messages.py:19 messages.py:20
22+
#: messages.py:19 messages.py:20 messages.py:21
2323
msgid "parentheses"
2424
msgstr ""
2525

26-
#: messages.py:23
26+
#: messages.py:24
2727
msgid "Hello, world!"
2828
msgstr ""
2929

30-
#: messages.py:26
30+
#: messages.py:27
3131
msgid ""
3232
"Hello,\n"
3333
" multiline!\n"
3434
msgstr ""
3535

3636
#: messages.py:46 messages.py:89 messages.py:90 messages.py:93 messages.py:94
37-
#: messages.py:99
37+
#: messages.py:99 messages.py:100 messages.py:101
3838
msgid "foo"
3939
msgid_plural "foos"
4040
msgstr[0] ""
@@ -68,22 +68,32 @@ msgstr ""
6868
msgid "set"
6969
msgstr ""
7070

71-
#: messages.py:63
71+
#: messages.py:62 messages.py:63
7272
msgid "nested string"
7373
msgstr ""
7474

7575
#: messages.py:68
7676
msgid "baz"
7777
msgstr ""
7878

79+
#: messages.py:71 messages.py:75
80+
msgid "default value"
81+
msgstr ""
82+
7983
#: messages.py:91 messages.py:92 messages.py:95 messages.py:96
8084
msgctxt "context"
8185
msgid "foo"
8286
msgid_plural "foos"
8387
msgstr[0] ""
8488
msgstr[1] ""
8589

86-
#: messages.py:100
90+
#: messages.py:102
8791
msgid "domain foo"
8892
msgstr ""
8993

94+
#: messages.py:118 messages.py:119
95+
msgid "world"
96+
msgid_plural "worlds"
97+
msgstr[0] ""
98+
msgstr[1] ""
99+

Lib/test/test_tools/i18n_data/messages.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
# Extra parentheses
1919
(_("parentheses"))
2020
((_("parentheses")))
21+
_(("parentheses"))
2122

2223
# Multiline strings
2324
_("Hello, "
@@ -32,23 +33,22 @@
3233
_(None)
3334
_(1)
3435
_(False)
35-
_(("invalid"))
3636
_(["invalid"])
3737
_({"invalid"})
3838
_("string"[3])
3939
_("string"[:3])
4040
_({"string": "foo"})
4141

4242
# pygettext does not allow keyword arguments, but both xgettext and pybabel do
43-
_(x="kwargs work!")
43+
_(x="kwargs are not allowed!")
4444

4545
# Unusual, but valid arguments
4646
_("foo", "bar")
4747
_("something", x="something else")
4848

4949
# .format()
5050
_("Hello, {}!").format("world") # valid
51-
_("Hello, {}!".format("world")) # invalid, but xgettext and pybabel extract the first string
51+
_("Hello, {}!".format("world")) # invalid, but xgettext extracts the first string
5252

5353
# Nested structures
5454
_("1"), _("2")
@@ -59,7 +59,7 @@
5959

6060
# Nested functions and classes
6161
def test():
62-
_("nested string") # XXX This should be extracted but isn't.
62+
_("nested string")
6363
[_("nested string")]
6464

6565

@@ -68,11 +68,11 @@ def bar(self):
6868
return _("baz")
6969

7070

71-
def bar(x=_('default value')): # XXX This should be extracted but isn't.
71+
def bar(x=_('default value')):
7272
pass
7373

7474

75-
def baz(x=[_('default value')]): # XXX This should be extracted but isn't.
75+
def baz(x=[_('default value')]):
7676
pass
7777

7878

@@ -97,6 +97,8 @@ def _(x="don't extract me"):
9797

9898
# Complex arguments
9999
ngettext("foo", "foos", 42 + (10 - 20))
100+
ngettext("foo", "foos", *args)
101+
ngettext("foo", "foos", **kwargs)
100102
dgettext(["some", {"complex"}, ("argument",)], "domain foo")
101103

102104
# Invalid calls which are not extracted
@@ -108,3 +110,10 @@ def _(x="don't extract me"):
108110
dngettext('domain', 'foo')
109111
dpgettext('domain', 'context')
110112
dnpgettext('domain', 'context', 'foo')
113+
dgettext(*args, 'foo')
114+
dpgettext(*args, 'context', 'foo')
115+
dnpgettext(*args, 'context', 'foo', 'foos')
116+
117+
# f-strings
118+
f"Hello, {_('world')}!"
119+
f"Hello, {ngettext('world', 'worlds', 3)}!"

Lib/test/test_tools/test_i18n.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def assert_POT_equal(self, expected, actual):
8787
self.maxDiff = None
8888
self.assertEqual(normalize_POT_file(expected), normalize_POT_file(actual))
8989

90-
def extract_from_str(self, module_content, *, args=(), strict=True):
90+
def extract_from_str(self, module_content, *, args=(), strict=True, with_stderr=False):
9191
"""Return all msgids extracted from module_content."""
9292
filename = 'test.py'
9393
with temp_cwd(None):
@@ -98,12 +98,18 @@ def extract_from_str(self, module_content, *, args=(), strict=True):
9898
self.assertEqual(res.err, b'')
9999
with open('messages.pot', encoding='utf-8') as fp:
100100
data = fp.read()
101-
return self.get_msgids(data)
101+
msgids = self.get_msgids(data)
102+
if not with_stderr:
103+
return msgids
104+
return msgids, res.err
102105

103106
def extract_docstrings_from_str(self, module_content):
104107
"""Return all docstrings extracted from module_content."""
105108
return self.extract_from_str(module_content, args=('--docstrings',), strict=False)
106109

110+
def get_stderr(self, module_content):
111+
return self.extract_from_str(module_content, strict=False, with_stderr=True)[1]
112+
107113
def test_header(self):
108114
"""Make sure the required fields are in the header, according to:
109115
http://www.gnu.org/software/gettext/manual/gettext.html#Header-Entry
@@ -407,6 +413,24 @@ def test_files_list(self):
407413
self.assertIn(f'msgid "{text2}"', data)
408414
self.assertNotIn(text3, data)
409415

416+
def test_error_messages(self):
417+
"""Test that pygettext outputs error messages to stderr."""
418+
stderr = self.get_stderr(dedent('''\
419+
_(1+2)
420+
ngettext('foo')
421+
dgettext(*args, 'foo')
422+
'''))
423+
424+
# Normalize line endings on Windows
425+
stderr = stderr.decode('utf-8').replace('\r', '')
426+
427+
self.assertEqual(
428+
stderr,
429+
"*** test.py:1: Expected a string constant for argument 1, got 1 + 2\n"
430+
"*** test.py:2: Expected at least 2 positional argument(s) in gettext call, got 1\n"
431+
"*** test.py:3: Variable positional arguments are not allowed in gettext calls\n"
432+
)
433+
410434

411435
def update_POT_snapshots():
412436
for input_file in DATA_DIR.glob('*.py'):
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix several bugs in extraction by switching to an AST parser in :program:`pygettext`.

0 commit comments

Comments
 (0)