Skip to content

Commit 0009651

Browse files
[3.12] gh-124969: Make locale.nl_langinfo(locale.ALT_DIGITS) returning a string again (GH-125774) (GH-125805)
This is a follow up of GH-124974. Only Glibc needed a fix. Now the returned value is a string consisting of semicolon-separated symbols on all Posix platforms. (cherry picked from commit dcc4fb2) Co-authored-by: Serhiy Storchaka <[email protected]>
1 parent cea93db commit 0009651

File tree

4 files changed

+52
-30
lines changed

4 files changed

+52
-30
lines changed

Doc/library/locale.rst

+4-3
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,7 @@ The :mod:`locale` module defines the following exception and functions:
158158

159159
.. function:: nl_langinfo(option)
160160

161-
Return some locale-specific information as a string (or a tuple for
162-
``ALT_DIGITS``). This function is not
161+
Return some locale-specific information as a string. This function is not
163162
available on all systems, and the set of possible options might also vary
164163
across platforms. The possible argument values are numbers, for which
165164
symbolic constants are available in the locale module.
@@ -312,7 +311,9 @@ The :mod:`locale` module defines the following exception and functions:
312311

313312
.. data:: ALT_DIGITS
314313

315-
Get a tuple of up to 100 strings used to represent the values 0 to 99.
314+
Get a string consisting of up to 100 semicolon-separated symbols used
315+
to represent the values 0 to 99 in a locale-specific way.
316+
In most locales this is an empty string.
316317

317318

318319
.. function:: getdefaultlocale([envvars])

Lib/test/test__locale.py

+21-9
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,10 @@
2626
'bs_BA', 'fr_LU', 'kl_GL', 'fa_IR', 'de_BE', 'sv_SE', 'it_CH', 'uk_UA',
2727
'eu_ES', 'vi_VN', 'af_ZA', 'nb_NO', 'en_DK', 'tg_TJ', 'ps_AF', 'en_US',
2828
'fr_FR.ISO8859-1', 'fr_FR.UTF-8', 'fr_FR.ISO8859-15@euro',
29-
'ru_RU.KOI8-R', 'ko_KR.eucKR']
29+
'ru_RU.KOI8-R', 'ko_KR.eucKR',
30+
'ja_JP.UTF-8', 'lzh_TW.UTF-8', 'my_MM.UTF-8', 'or_IN.UTF-8', 'shn_MM.UTF-8',
31+
'ar_AE.UTF-8', 'bn_IN.UTF-8', 'mr_IN.UTF-8', 'th_TH.TIS620',
32+
]
3033

3134
def setUpModule():
3235
global candidate_locales
@@ -78,11 +81,13 @@ def accept(loc):
7881
'C': (0, {}),
7982
'en_US': (0, {}),
8083
'fa_IR': (100, {0: '\u06f0\u06f0', 10: '\u06f1\u06f0', 99: '\u06f9\u06f9'}),
81-
'ja_JP': (100, {0: '\u3007', 10: '\u5341', 99: '\u4e5d\u5341\u4e5d'}),
84+
'ja_JP': (100, {1: '\u4e00', 10: '\u5341', 99: '\u4e5d\u5341\u4e5d'}),
8285
'lzh_TW': (32, {0: '\u3007', 10: '\u5341', 31: '\u5345\u4e00'}),
8386
'my_MM': (100, {0: '\u1040\u1040', 10: '\u1041\u1040', 99: '\u1049\u1049'}),
8487
'or_IN': (100, {0: '\u0b66', 10: '\u0b67\u0b66', 99: '\u0b6f\u0b6f'}),
8588
'shn_MM': (100, {0: '\u1090\u1090', 10: '\u1091\u1090', 99: '\u1099\u1099'}),
89+
'ar_AE': (100, {0: '\u0660', 10: '\u0661\u0660', 99: '\u0669\u0669'}),
90+
'bn_IN': (100, {0: '\u09e6', 10: '\u09e7\u09e6', 99: '\u09ef\u09ef'}),
8691
}
8792

8893
if sys.platform == 'win32':
@@ -196,22 +201,29 @@ def test_lc_numeric_basic(self):
196201
def test_alt_digits_nl_langinfo(self):
197202
# Test nl_langinfo(ALT_DIGITS)
198203
tested = False
199-
for loc, (count, samples) in known_alt_digits.items():
204+
for loc in candidate_locales:
200205
with self.subTest(locale=loc):
201206
try:
202207
setlocale(LC_TIME, loc)
203208
setlocale(LC_CTYPE, loc)
204209
except Error:
205210
self.skipTest(f'no locale {loc!r}')
206211
continue
212+
207213
with self.subTest(locale=loc):
208214
alt_digits = nl_langinfo(locale.ALT_DIGITS)
209-
self.assertIsInstance(alt_digits, tuple)
210-
if count and not alt_digits and support.is_apple:
211-
self.skipTest(f'ALT_DIGITS is not set for locale {loc!r} on Apple platforms')
212-
self.assertEqual(len(alt_digits), count)
213-
for i in samples:
214-
self.assertEqual(alt_digits[i], samples[i])
215+
self.assertIsInstance(alt_digits, str)
216+
alt_digits = alt_digits.split(';') if alt_digits else []
217+
if alt_digits:
218+
self.assertGreaterEqual(len(alt_digits), 10, alt_digits)
219+
loc1 = loc.split('.', 1)[0]
220+
if loc1 in known_alt_digits:
221+
count, samples = known_alt_digits[loc1]
222+
if count and not alt_digits:
223+
self.skipTest(f'ALT_DIGITS is not set for locale {loc!r} on this platform')
224+
self.assertEqual(len(alt_digits), count, alt_digits)
225+
for i in samples:
226+
self.assertEqual(alt_digits[i], samples[i])
215227
tested = True
216228
if not tested:
217229
self.skipTest('no suitable locales')
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1-
Fix ``locale.nl_langinfo(locale.ALT_DIGITS)``. Now it returns a tuple of up
2-
to 100 strings (an empty tuple on most locales). Previously it returned the
3-
first item of that tuple or an empty string.
1+
Fix ``locale.nl_langinfo(locale.ALT_DIGITS)`` on platforms with glibc.
2+
Now it returns a string consisting of up to 100 semicolon-separated symbols
3+
(an empty string in most locales) on all Posix platforms.
4+
Previously it only returned the first symbol or an empty string.

Modules/_localemodule.c

+23-15
Original file line numberDiff line numberDiff line change
@@ -619,28 +619,36 @@ _locale_nl_langinfo_impl(PyObject *module, int item)
619619
const char *result = nl_langinfo(item);
620620
result = result != NULL ? result : "";
621621
PyObject *pyresult;
622+
#ifdef __GLIBC__
622623
#ifdef ALT_DIGITS
623-
if (item == ALT_DIGITS) {
624-
/* The result is a sequence of up to 100 NUL-separated strings. */
625-
const char *s = result;
624+
if (item == ALT_DIGITS && *result) {
625+
/* According to the POSIX specification the result must be
626+
* a sequence of up to 100 semicolon-separated strings.
627+
* But in Glibc they are NUL-separated. */
628+
Py_ssize_t i = 0;
626629
int count = 0;
627-
for (; count < 100 && *s; count++) {
628-
s += strlen(s) + 1;
630+
for (; count < 100 && result[i]; count++) {
631+
i += strlen(result + i) + 1;
629632
}
630-
pyresult = PyTuple_New(count);
631-
if (pyresult != NULL) {
632-
for (int i = 0; i < count; i++) {
633-
PyObject *unicode = PyUnicode_DecodeLocale(result, NULL);
634-
if (unicode == NULL) {
635-
Py_CLEAR(pyresult);
636-
break;
637-
}
638-
PyTuple_SET_ITEM(pyresult, i, unicode);
639-
result += strlen(result) + 1;
633+
char *buf = PyMem_Malloc(i);
634+
if (buf == NULL) {
635+
PyErr_NoMemory();
636+
pyresult = NULL;
637+
}
638+
else {
639+
memcpy(buf, result, i);
640+
/* Replace all NULs with semicolons. */
641+
i = 0;
642+
while (--count) {
643+
i += strlen(buf + i);
644+
buf[i++] = ';';
640645
}
646+
pyresult = PyUnicode_DecodeLocale(buf, NULL);
647+
PyMem_Free(buf);
641648
}
642649
}
643650
else
651+
#endif
644652
#endif
645653
{
646654
pyresult = PyUnicode_DecodeLocale(result, NULL);

0 commit comments

Comments
 (0)