Skip to content

Commit 93b9e6b

Browse files
gh-69998: Fix decoding error in locale.nl_langinfo() (GH-124963)
The function now sets temporarily the LC_CTYPE locale to the locale of the category that determines the requested value if the locales are different and the resulting string is non-ASCII. This temporary change affects other threads.
1 parent 2739099 commit 93b9e6b

File tree

5 files changed

+153
-70
lines changed

5 files changed

+153
-70
lines changed

Doc/library/locale.rst

+9
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,15 @@ The :mod:`locale` module defines the following exception and functions:
314314
Get a representation of up to 100 values used to represent the values
315315
0 to 99.
316316

317+
The function temporarily sets the ``LC_CTYPE`` locale to the locale
318+
of the category that determines the requested value (``LC_TIME``,
319+
``LC_NUMERIC``, ``LC_MONETARY`` or ``LC_MESSAGES``) if locales are
320+
different and the resulting string is non-ASCII.
321+
This temporary change affects other threads.
322+
323+
.. versionchanged:: 3.14
324+
The function now temporarily sets the ``LC_CTYPE`` locale in some cases.
325+
317326

318327
.. function:: getdefaultlocale([envvars])
319328

Doc/whatsnew/3.14.rst

+6
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,12 @@ Changes in the Python API
587587
Wrap it in :func:`staticmethod` if you want to preserve the old behavior.
588588
(Contributed by Serhiy Storchaka and Dominykas Grigonis in :gh:`121027`.)
589589

590+
* The :func:`locale.nl_langinfo` function now sets temporarily the ``LC_CTYPE``
591+
locale in some cases.
592+
This temporary change affects other threads.
593+
(Contributed by Serhiy Storchaka in :gh:`69998`.)
594+
595+
590596
Build Changes
591597
=============
592598

Lib/test/test__locale.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -115,16 +115,17 @@ def numeric_tester(self, calc_type, calc_value, data_type, used_locale):
115115
def test_lc_numeric_nl_langinfo(self):
116116
# Test nl_langinfo against known values
117117
tested = False
118+
oldloc = setlocale(LC_CTYPE)
118119
for loc in candidate_locales:
119120
try:
120121
setlocale(LC_NUMERIC, loc)
121-
setlocale(LC_CTYPE, loc)
122122
except Error:
123123
continue
124124
for li, lc in ((RADIXCHAR, "decimal_point"),
125125
(THOUSEP, "thousands_sep")):
126126
if self.numeric_tester('nl_langinfo', nl_langinfo(li), lc, loc):
127127
tested = True
128+
self.assertEqual(setlocale(LC_CTYPE), oldloc)
128129
if not tested:
129130
self.skipTest('no suitable locales')
130131

@@ -135,28 +136,29 @@ def test_lc_numeric_nl_langinfo(self):
135136
def test_lc_numeric_localeconv(self):
136137
# Test localeconv against known values
137138
tested = False
139+
oldloc = setlocale(LC_CTYPE)
138140
for loc in candidate_locales:
139141
try:
140142
setlocale(LC_NUMERIC, loc)
141-
setlocale(LC_CTYPE, loc)
142143
except Error:
143144
continue
144145
formatting = localeconv()
145146
for lc in ("decimal_point",
146147
"thousands_sep"):
147148
if self.numeric_tester('localeconv', formatting[lc], lc, loc):
148149
tested = True
150+
self.assertEqual(setlocale(LC_CTYPE), oldloc)
149151
if not tested:
150152
self.skipTest('no suitable locales')
151153

152154
@unittest.skipUnless(nl_langinfo, "nl_langinfo is not available")
153155
def test_lc_numeric_basic(self):
154156
# Test nl_langinfo against localeconv
155157
tested = False
158+
oldloc = setlocale(LC_CTYPE)
156159
for loc in candidate_locales:
157160
try:
158161
setlocale(LC_NUMERIC, loc)
159-
setlocale(LC_CTYPE, loc)
160162
except Error:
161163
continue
162164
for li, lc in ((RADIXCHAR, "decimal_point"),
@@ -173,17 +175,18 @@ def test_lc_numeric_basic(self):
173175
nl_radixchar, li_radixchar,
174176
loc, set_locale))
175177
tested = True
178+
self.assertEqual(setlocale(LC_CTYPE), oldloc)
176179
if not tested:
177180
self.skipTest('no suitable locales')
178181

179182
def test_float_parsing(self):
180183
# Bug #1391872: Test whether float parsing is okay on European
181184
# locales.
182185
tested = False
186+
oldloc = setlocale(LC_CTYPE)
183187
for loc in candidate_locales:
184188
try:
185189
setlocale(LC_NUMERIC, loc)
186-
setlocale(LC_CTYPE, loc)
187190
except Error:
188191
continue
189192

@@ -199,6 +202,7 @@ def test_float_parsing(self):
199202
self.assertRaises(ValueError, float,
200203
localeconv()['decimal_point'].join(['1', '23']))
201204
tested = True
205+
self.assertEqual(setlocale(LC_CTYPE), oldloc)
202206
if not tested:
203207
self.skipTest('no suitable locales')
204208

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix :func:`locale.nl_langinfo` in case when different categories have
2+
different locales. The function now sets temporarily the ``LC_CTYPE`` locale
3+
in some cases. This temporary change affects other threads.

Modules/_localemodule.c

+127-66
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,17 @@ locale_is_ascii(const char *str)
144144
return (strlen(str) == 1 && ((unsigned char)str[0]) <= 127);
145145
}
146146

147+
static int
148+
is_all_ascii(const char *str)
149+
{
150+
for (; *str; str++) {
151+
if ((unsigned char)*str > 127) {
152+
return 0;
153+
}
154+
}
155+
return 1;
156+
}
157+
147158
static int
148159
locale_decode_monetary(PyObject *dict, struct lconv *lc)
149160
{
@@ -478,113 +489,153 @@ _locale__getdefaultlocale_impl(PyObject *module)
478489
#endif
479490

480491
#ifdef HAVE_LANGINFO_H
481-
#define LANGINFO(X) {#X, X}
492+
#define LANGINFO(X, Y) {#X, X, Y}
482493
static struct langinfo_constant{
483-
char* name;
494+
const char *name;
484495
int value;
496+
int category;
485497
} langinfo_constants[] =
486498
{
487499
/* These constants should exist on any langinfo implementation */
488-
LANGINFO(DAY_1),
489-
LANGINFO(DAY_2),
490-
LANGINFO(DAY_3),
491-
LANGINFO(DAY_4),
492-
LANGINFO(DAY_5),
493-
LANGINFO(DAY_6),
494-
LANGINFO(DAY_7),
495-
496-
LANGINFO(ABDAY_1),
497-
LANGINFO(ABDAY_2),
498-
LANGINFO(ABDAY_3),
499-
LANGINFO(ABDAY_4),
500-
LANGINFO(ABDAY_5),
501-
LANGINFO(ABDAY_6),
502-
LANGINFO(ABDAY_7),
503-
504-
LANGINFO(MON_1),
505-
LANGINFO(MON_2),
506-
LANGINFO(MON_3),
507-
LANGINFO(MON_4),
508-
LANGINFO(MON_5),
509-
LANGINFO(MON_6),
510-
LANGINFO(MON_7),
511-
LANGINFO(MON_8),
512-
LANGINFO(MON_9),
513-
LANGINFO(MON_10),
514-
LANGINFO(MON_11),
515-
LANGINFO(MON_12),
516-
517-
LANGINFO(ABMON_1),
518-
LANGINFO(ABMON_2),
519-
LANGINFO(ABMON_3),
520-
LANGINFO(ABMON_4),
521-
LANGINFO(ABMON_5),
522-
LANGINFO(ABMON_6),
523-
LANGINFO(ABMON_7),
524-
LANGINFO(ABMON_8),
525-
LANGINFO(ABMON_9),
526-
LANGINFO(ABMON_10),
527-
LANGINFO(ABMON_11),
528-
LANGINFO(ABMON_12),
500+
LANGINFO(DAY_1, LC_TIME),
501+
LANGINFO(DAY_2, LC_TIME),
502+
LANGINFO(DAY_3, LC_TIME),
503+
LANGINFO(DAY_4, LC_TIME),
504+
LANGINFO(DAY_5, LC_TIME),
505+
LANGINFO(DAY_6, LC_TIME),
506+
LANGINFO(DAY_7, LC_TIME),
507+
508+
LANGINFO(ABDAY_1, LC_TIME),
509+
LANGINFO(ABDAY_2, LC_TIME),
510+
LANGINFO(ABDAY_3, LC_TIME),
511+
LANGINFO(ABDAY_4, LC_TIME),
512+
LANGINFO(ABDAY_5, LC_TIME),
513+
LANGINFO(ABDAY_6, LC_TIME),
514+
LANGINFO(ABDAY_7, LC_TIME),
515+
516+
LANGINFO(MON_1, LC_TIME),
517+
LANGINFO(MON_2, LC_TIME),
518+
LANGINFO(MON_3, LC_TIME),
519+
LANGINFO(MON_4, LC_TIME),
520+
LANGINFO(MON_5, LC_TIME),
521+
LANGINFO(MON_6, LC_TIME),
522+
LANGINFO(MON_7, LC_TIME),
523+
LANGINFO(MON_8, LC_TIME),
524+
LANGINFO(MON_9, LC_TIME),
525+
LANGINFO(MON_10, LC_TIME),
526+
LANGINFO(MON_11, LC_TIME),
527+
LANGINFO(MON_12, LC_TIME),
528+
529+
LANGINFO(ABMON_1, LC_TIME),
530+
LANGINFO(ABMON_2, LC_TIME),
531+
LANGINFO(ABMON_3, LC_TIME),
532+
LANGINFO(ABMON_4, LC_TIME),
533+
LANGINFO(ABMON_5, LC_TIME),
534+
LANGINFO(ABMON_6, LC_TIME),
535+
LANGINFO(ABMON_7, LC_TIME),
536+
LANGINFO(ABMON_8, LC_TIME),
537+
LANGINFO(ABMON_9, LC_TIME),
538+
LANGINFO(ABMON_10, LC_TIME),
539+
LANGINFO(ABMON_11, LC_TIME),
540+
LANGINFO(ABMON_12, LC_TIME),
529541

530542
#ifdef RADIXCHAR
531543
/* The following are not available with glibc 2.0 */
532-
LANGINFO(RADIXCHAR),
533-
LANGINFO(THOUSEP),
544+
LANGINFO(RADIXCHAR, LC_NUMERIC),
545+
LANGINFO(THOUSEP, LC_NUMERIC),
534546
/* YESSTR and NOSTR are deprecated in glibc, since they are
535547
a special case of message translation, which should be rather
536548
done using gettext. So we don't expose it to Python in the
537549
first place.
538-
LANGINFO(YESSTR),
539-
LANGINFO(NOSTR),
550+
LANGINFO(YESSTR, LC_MESSAGES),
551+
LANGINFO(NOSTR, LC_MESSAGES),
540552
*/
541-
LANGINFO(CRNCYSTR),
553+
LANGINFO(CRNCYSTR, LC_MONETARY),
542554
#endif
543555

544-
LANGINFO(D_T_FMT),
545-
LANGINFO(D_FMT),
546-
LANGINFO(T_FMT),
547-
LANGINFO(AM_STR),
548-
LANGINFO(PM_STR),
556+
LANGINFO(D_T_FMT, LC_TIME),
557+
LANGINFO(D_FMT, LC_TIME),
558+
LANGINFO(T_FMT, LC_TIME),
559+
LANGINFO(AM_STR, LC_TIME),
560+
LANGINFO(PM_STR, LC_TIME),
549561

550562
/* The following constants are available only with XPG4, but...
551563
OpenBSD doesn't have CODESET but has T_FMT_AMPM, and doesn't have
552564
a few of the others.
553565
Solution: ifdef-test them all. */
554566
#ifdef CODESET
555-
LANGINFO(CODESET),
567+
LANGINFO(CODESET, LC_CTYPE),
556568
#endif
557569
#ifdef T_FMT_AMPM
558-
LANGINFO(T_FMT_AMPM),
570+
LANGINFO(T_FMT_AMPM, LC_TIME),
559571
#endif
560572
#ifdef ERA
561-
LANGINFO(ERA),
573+
LANGINFO(ERA, LC_TIME),
562574
#endif
563575
#ifdef ERA_D_FMT
564-
LANGINFO(ERA_D_FMT),
576+
LANGINFO(ERA_D_FMT, LC_TIME),
565577
#endif
566578
#ifdef ERA_D_T_FMT
567-
LANGINFO(ERA_D_T_FMT),
579+
LANGINFO(ERA_D_T_FMT, LC_TIME),
568580
#endif
569581
#ifdef ERA_T_FMT
570-
LANGINFO(ERA_T_FMT),
582+
LANGINFO(ERA_T_FMT, LC_TIME),
571583
#endif
572584
#ifdef ALT_DIGITS
573-
LANGINFO(ALT_DIGITS),
585+
LANGINFO(ALT_DIGITS, LC_TIME),
574586
#endif
575587
#ifdef YESEXPR
576-
LANGINFO(YESEXPR),
588+
LANGINFO(YESEXPR, LC_MESSAGES),
577589
#endif
578590
#ifdef NOEXPR
579-
LANGINFO(NOEXPR),
591+
LANGINFO(NOEXPR, LC_MESSAGES),
580592
#endif
581593
#ifdef _DATE_FMT
582594
/* This is not available in all glibc versions that have CODESET. */
583-
LANGINFO(_DATE_FMT),
595+
LANGINFO(_DATE_FMT, LC_TIME),
584596
#endif
585-
{0, 0}
597+
{0, 0, 0}
586598
};
587599

600+
/* Temporary make the LC_CTYPE locale to be the same as
601+
* the locale of the specified category. */
602+
static int
603+
change_locale(int category, char **oldloc)
604+
{
605+
/* Keep a copy of the LC_CTYPE locale */
606+
*oldloc = setlocale(LC_CTYPE, NULL);
607+
if (!*oldloc) {
608+
PyErr_SetString(PyExc_RuntimeError, "faild to get LC_CTYPE locale");
609+
return -1;
610+
}
611+
*oldloc = _PyMem_Strdup(*oldloc);
612+
if (!*oldloc) {
613+
PyErr_NoMemory();
614+
return -1;
615+
}
616+
617+
/* Set a new locale if it is different. */
618+
char *loc = setlocale(category, NULL);
619+
if (loc == NULL || strcmp(loc, *oldloc) == 0) {
620+
PyMem_Free(*oldloc);
621+
*oldloc = NULL;
622+
return 0;
623+
}
624+
625+
setlocale(LC_CTYPE, loc);
626+
return 1;
627+
}
628+
629+
/* Restore the old LC_CTYPE locale. */
630+
static void
631+
restore_locale(char *oldloc)
632+
{
633+
if (oldloc != NULL) {
634+
setlocale(LC_CTYPE, oldloc);
635+
PyMem_Free(oldloc);
636+
}
637+
}
638+
588639
/*[clinic input]
589640
_locale.nl_langinfo
590641
@@ -602,14 +653,24 @@ _locale_nl_langinfo_impl(PyObject *module, int item)
602653
/* Check whether this is a supported constant. GNU libc sometimes
603654
returns numeric values in the char* return value, which would
604655
crash PyUnicode_FromString. */
605-
for (i = 0; langinfo_constants[i].name; i++)
656+
for (i = 0; langinfo_constants[i].name; i++) {
606657
if (langinfo_constants[i].value == item) {
607658
/* Check NULL as a workaround for GNU libc's returning NULL
608659
instead of an empty string for nl_langinfo(ERA). */
609660
const char *result = nl_langinfo(item);
610661
result = result != NULL ? result : "";
611-
return PyUnicode_DecodeLocale(result, NULL);
662+
char *oldloc = NULL;
663+
if (langinfo_constants[i].category != LC_CTYPE
664+
&& !is_all_ascii(result)
665+
&& change_locale(langinfo_constants[i].category, &oldloc) < 0)
666+
{
667+
return NULL;
668+
}
669+
PyObject *unicode = PyUnicode_DecodeLocale(result, NULL);
670+
restore_locale(oldloc);
671+
return unicode;
612672
}
673+
}
613674
PyErr_SetString(PyExc_ValueError, "unsupported langinfo constant");
614675
return NULL;
615676
}

0 commit comments

Comments
 (0)