Skip to content

Commit eb81795

Browse files
authored
bpo-30565: Add PYTHONCOERCECLOCALE=warn runtime flag (GH-2260)
- removes PY_WARN_ON_C_LOCALE build time flag - locale coercion and compatibility warnings are now always compiled in, but are off by default - adds PYTHONCOERCECLOCALE=warn runtime option to aid in debugging potentially locale related compatibility problems Due to not-yet-resolved test failures on *BSD systems (including Mac OS X), this also temporarily disables UTF-8 as a locale coercion target, and skips testing the interpreter's behavior in the POSIX locale.
1 parent 6a98a04 commit eb81795

File tree

5 files changed

+183
-121
lines changed

5 files changed

+183
-121
lines changed

Doc/using/cmdline.rst

+5
Original file line numberDiff line numberDiff line change
@@ -744,6 +744,11 @@ conflict.
744744
:data:`sys.stdin` and :data:`sys.stdout` to ``surrogateescape``. This
745745
behavior can be overridden using :envvar:`PYTHONIOENCODING` as usual.
746746

747+
For debugging purposes, setting ``PYTHONCOERCECLOCALE=warn`` will cause
748+
Python to emit warning messages on ``stderr`` if either the locale coercion
749+
activates, or else if a locale that *would* have triggered coercion is
750+
still active when the Python runtime is initialized.
751+
747752
Availability: \*nix
748753

749754
.. versionadded:: 3.7

Doc/whatsnew/3.7.rst

+5-14
Original file line numberDiff line numberDiff line change
@@ -96,20 +96,11 @@ defined coercion target locales (currently ``C.UTF-8``, ``C.utf8``, and
9696
``UTF-8``). The default error handler for ``stderr`` continues to be
9797
``backslashreplace``, regardless of locale.
9898

99-
.. note::
100-
101-
In the current implementation, a warning message is printed directly to
102-
``stderr`` even for successful implicit locale coercion. This gives
103-
redistributors and system integrators the opportunity to determine if they
104-
should be making an environmental change to avoid the need for implicit
105-
coercion at the Python interpreter level.
106-
107-
However, it's not clear that this is going to be the best approach for
108-
the final 3.7.0 release, and we may end up deciding to disable the warning
109-
by default and provide some way of opting into it at runtime or build time.
110-
111-
Concrete examples of use cases where it would be preferrable to disable the
112-
warning by default can be noted on :issue:`30565`.
99+
Locale coercion is silent by default, but to assist in debugging potentially
100+
locale related integration problems, explicit warnings (emitted directly on
101+
``stderr`` can be requested by setting ``PYTHONCOERCECLOCALE=warn``. This
102+
setting will also cause the Python runtime to emit a warning if the legacy C
103+
locale remains active when the core interpreter is initialized.
113104

114105
.. seealso::
115106

Lib/test/test_c_locale_coercion.py

+130-78
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,23 @@
2222
else:
2323
C_LOCALE_FS_ENCODING = C_LOCALE_STREAM_ENCODING
2424

25-
# XXX (ncoghlan): The above is probably still wrong for:
25+
# Note that the above is probably still wrong in some cases, such as:
2626
# * Windows when PYTHONLEGACYWINDOWSFSENCODING is set
2727
# * AIX and any other platforms that use latin-1 in the C locale
28+
#
29+
# Options for dealing with this:
30+
# * Don't set PYTHON_COERCE_C_LOCALE on such platforms (e.g. Windows doesn't)
31+
# * Fix the test expectations to match the actual platform behaviour
2832

2933
# In order to get the warning messages to match up as expected, the candidate
3034
# order here must much the target locale order in Python/pylifecycle.c
31-
_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8", "UTF-8")
35+
_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8") #, "UTF-8")
36+
37+
# XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to
38+
# problems encountered on *BSD systems with those test cases
39+
# For additional details see:
40+
# nl_langinfo CODESET error: https://bugs.python.org/issue30647
41+
# locale handling differences: https://bugs.python.org/issue30672
3242

3343
# There's no reliable cross-platform way of checking locale alias
3444
# lists, so the only way of knowing which of these locales will work
@@ -40,28 +50,39 @@ def _set_locale_in_subprocess(locale_name):
4050
result, py_cmd = run_python_until_end("-c", cmd, __isolated=True)
4151
return result.rc == 0
4252

43-
_EncodingDetails = namedtuple("EncodingDetails",
44-
"fsencoding stdin_info stdout_info stderr_info")
53+
_fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all"
54+
_EncodingDetails = namedtuple("EncodingDetails", _fields)
4555

4656
class EncodingDetails(_EncodingDetails):
57+
# XXX (ncoghlan): Using JSON for child state reporting may be less fragile
4758
CHILD_PROCESS_SCRIPT = ";".join([
48-
"import sys",
59+
"import sys, os",
4960
"print(sys.getfilesystemencoding())",
5061
"print(sys.stdin.encoding + ':' + sys.stdin.errors)",
5162
"print(sys.stdout.encoding + ':' + sys.stdout.errors)",
5263
"print(sys.stderr.encoding + ':' + sys.stderr.errors)",
64+
"print(os.environ.get('LANG', 'not set'))",
65+
"print(os.environ.get('LC_CTYPE', 'not set'))",
66+
"print(os.environ.get('LC_ALL', 'not set'))",
5367
])
5468

5569
@classmethod
56-
def get_expected_details(cls, fs_encoding, stream_encoding):
70+
def get_expected_details(cls, coercion_expected, fs_encoding, stream_encoding, env_vars):
5771
"""Returns expected child process details for a given encoding"""
5872
_stream = stream_encoding + ":{}"
5973
# stdin and stdout should use surrogateescape either because the
6074
# coercion triggered, or because the C locale was detected
6175
stream_info = 2*[_stream.format("surrogateescape")]
6276
# stderr should always use backslashreplace
6377
stream_info.append(_stream.format("backslashreplace"))
64-
return dict(cls(fs_encoding, *stream_info)._asdict())
78+
expected_lang = env_vars.get("LANG", "not set").lower()
79+
if coercion_expected:
80+
expected_lc_ctype = CLI_COERCION_TARGET.lower()
81+
else:
82+
expected_lc_ctype = env_vars.get("LC_CTYPE", "not set").lower()
83+
expected_lc_all = env_vars.get("LC_ALL", "not set").lower()
84+
env_info = expected_lang, expected_lc_ctype, expected_lc_all
85+
return dict(cls(fs_encoding, *stream_info, *env_info)._asdict())
6586

6687
@staticmethod
6788
def _handle_output_variations(data):
@@ -97,64 +118,20 @@ def get_child_details(cls, env_vars):
97118
result.fail(py_cmd)
98119
# All subprocess outputs in this test case should be pure ASCII
99120
adjusted_output = cls._handle_output_variations(result.out)
100-
stdout_lines = adjusted_output.decode("ascii").rstrip().splitlines()
121+
stdout_lines = adjusted_output.decode("ascii").splitlines()
101122
child_encoding_details = dict(cls(*stdout_lines)._asdict())
102123
stderr_lines = result.err.decode("ascii").rstrip().splitlines()
103124
return child_encoding_details, stderr_lines
104125

105126

106-
class _ChildProcessEncodingTestCase(unittest.TestCase):
107-
# Base class to check for expected encoding details in a child process
108-
109-
def _check_child_encoding_details(self,
110-
env_vars,
111-
expected_fs_encoding,
112-
expected_stream_encoding,
113-
expected_warning):
114-
"""Check the C locale handling for the given process environment
115-
116-
Parameters:
117-
expected_fs_encoding: expected sys.getfilesystemencoding() result
118-
expected_stream_encoding: expected encoding for standard streams
119-
expected_warning: stderr output to expect (if any)
120-
"""
121-
result = EncodingDetails.get_child_details(env_vars)
122-
encoding_details, stderr_lines = result
123-
self.assertEqual(encoding_details,
124-
EncodingDetails.get_expected_details(
125-
expected_fs_encoding,
126-
expected_stream_encoding))
127-
self.assertEqual(stderr_lines, expected_warning)
128-
129127
# Details of the shared library warning emitted at runtime
130-
LIBRARY_C_LOCALE_WARNING = (
128+
LEGACY_LOCALE_WARNING = (
131129
"Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
132130
"encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "
133131
"C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
134132
"locales is recommended."
135133
)
136134

137-
@unittest.skipUnless(sysconfig.get_config_var("PY_WARN_ON_C_LOCALE"),
138-
"C locale runtime warning disabled at build time")
139-
class LocaleWarningTests(_ChildProcessEncodingTestCase):
140-
# Test warning emitted when running in the C locale
141-
142-
def test_library_c_locale_warning(self):
143-
self.maxDiff = None
144-
for locale_to_set in ("C", "POSIX", "invalid.ascii"):
145-
# XXX (ncoghlan): Mac OS X doesn't behave as expected in the
146-
# POSIX locale, so we skip that for now
147-
if sys.platform == "darwin" and locale_to_set == "POSIX":
148-
continue
149-
var_dict = {
150-
"LC_ALL": locale_to_set
151-
}
152-
with self.subTest(forced_locale=locale_to_set):
153-
self._check_child_encoding_details(var_dict,
154-
C_LOCALE_FS_ENCODING,
155-
C_LOCALE_STREAM_ENCODING,
156-
[LIBRARY_C_LOCALE_WARNING])
157-
158135
# Details of the CLI locale coercion warning emitted at runtime
159136
CLI_COERCION_WARNING_FMT = (
160137
"Python detected LC_CTYPE=C: LC_CTYPE coerced to {} (set another locale "
@@ -163,9 +140,13 @@ def test_library_c_locale_warning(self):
163140

164141

165142
AVAILABLE_TARGETS = None
143+
CLI_COERCION_TARGET = None
144+
CLI_COERCION_WARNING = None
166145

167146
def setUpModule():
168147
global AVAILABLE_TARGETS
148+
global CLI_COERCION_TARGET
149+
global CLI_COERCION_WARNING
169150

170151
if AVAILABLE_TARGETS is not None:
171152
# initialization already done
@@ -177,26 +158,57 @@ def setUpModule():
177158
if _set_locale_in_subprocess(target_locale):
178159
AVAILABLE_TARGETS.append(target_locale)
179160

161+
if AVAILABLE_TARGETS:
162+
# Coercion is expected to use the first available target locale
163+
CLI_COERCION_TARGET = AVAILABLE_TARGETS[0]
164+
CLI_COERCION_WARNING = CLI_COERCION_WARNING_FMT.format(CLI_COERCION_TARGET)
180165

181166

182-
class _LocaleCoercionTargetsTestCase(_ChildProcessEncodingTestCase):
183-
# Base class for test cases that rely on coercion targets being defined
167+
class _LocaleHandlingTestCase(unittest.TestCase):
168+
# Base class to check expected locale handling behaviour
184169

185-
@classmethod
186-
def setUpClass(cls):
187-
if not AVAILABLE_TARGETS:
188-
raise unittest.SkipTest("No C-with-UTF-8 locale available")
170+
def _check_child_encoding_details(self,
171+
env_vars,
172+
expected_fs_encoding,
173+
expected_stream_encoding,
174+
expected_warnings,
175+
coercion_expected):
176+
"""Check the C locale handling for the given process environment
189177
178+
Parameters:
179+
expected_fs_encoding: expected sys.getfilesystemencoding() result
180+
expected_stream_encoding: expected encoding for standard streams
181+
expected_warning: stderr output to expect (if any)
182+
"""
183+
result = EncodingDetails.get_child_details(env_vars)
184+
encoding_details, stderr_lines = result
185+
expected_details = EncodingDetails.get_expected_details(
186+
coercion_expected,
187+
expected_fs_encoding,
188+
expected_stream_encoding,
189+
env_vars
190+
)
191+
self.assertEqual(encoding_details, expected_details)
192+
if expected_warnings is None:
193+
expected_warnings = []
194+
self.assertEqual(stderr_lines, expected_warnings)
190195

191-
class LocaleConfigurationTests(_LocaleCoercionTargetsTestCase):
196+
197+
class LocaleConfigurationTests(_LocaleHandlingTestCase):
192198
# Test explicit external configuration via the process environment
193199

200+
def setUpClass():
201+
# This relies on setupModule() having been run, so it can't be
202+
# handled via the @unittest.skipUnless decorator
203+
if not AVAILABLE_TARGETS:
204+
raise unittest.SkipTest("No C-with-UTF-8 locale available")
205+
194206
def test_external_target_locale_configuration(self):
207+
195208
# Explicitly setting a target locale should give the same behaviour as
196209
# is seen when implicitly coercing to that target locale
197210
self.maxDiff = None
198211

199-
expected_warning = []
200212
expected_fs_encoding = "utf-8"
201213
expected_stream_encoding = "utf-8"
202214

@@ -209,6 +221,7 @@ def test_external_target_locale_configuration(self):
209221
for locale_to_set in AVAILABLE_TARGETS:
210222
# XXX (ncoghlan): LANG=UTF-8 doesn't appear to work as
211223
# expected, so skip that combination for now
224+
# See https://bugs.python.org/issue30672 for discussion
212225
if env_var == "LANG" and locale_to_set == "UTF-8":
213226
continue
214227

@@ -219,17 +232,23 @@ def test_external_target_locale_configuration(self):
219232
self._check_child_encoding_details(var_dict,
220233
expected_fs_encoding,
221234
expected_stream_encoding,
222-
expected_warning)
235+
expected_warnings=None,
236+
coercion_expected=False)
223237

224238

225239

226240
@test.support.cpython_only
227241
@unittest.skipUnless(sysconfig.get_config_var("PY_COERCE_C_LOCALE"),
228242
"C locale coercion disabled at build time")
229-
class LocaleCoercionTests(_LocaleCoercionTargetsTestCase):
243+
class LocaleCoercionTests(_LocaleHandlingTestCase):
230244
# Test implicit reconfiguration of the environment during CLI startup
231245

232-
def _check_c_locale_coercion(self, fs_encoding, stream_encoding, coerce_c_locale):
246+
def _check_c_locale_coercion(self,
247+
fs_encoding, stream_encoding,
248+
coerce_c_locale,
249+
expected_warnings=None,
250+
coercion_expected=True,
251+
**extra_vars):
233252
"""Check the C locale handling for various configurations
234253
235254
Parameters:
@@ -238,27 +257,31 @@ def _check_c_locale_coercion(self, fs_encoding, stream_encoding, coerce_c_locale
238257
coerce_c_locale: setting to use for PYTHONCOERCECLOCALE
239258
None: don't set the variable at all
240259
str: the value set in the child's environment
260+
expected_warnings: expected warning lines on stderr
261+
extra_vars: additional environment variables to set in subprocess
241262
"""
242-
243-
# Check for expected warning on stderr if C locale is coerced
244263
self.maxDiff = None
245264

246-
expected_warning = []
247-
if coerce_c_locale != "0":
248-
# Expect coercion to use the first available locale
249-
warning_msg = CLI_COERCION_WARNING_FMT.format(AVAILABLE_TARGETS[0])
250-
expected_warning.append(warning_msg)
265+
if not AVAILABLE_TARGETS:
266+
# Locale coercion is disabled when there aren't any target locales
267+
fs_encoding = C_LOCALE_FS_ENCODING
268+
stream_encoding = C_LOCALE_STREAM_ENCODING
269+
coercion_expected = False
270+
if expected_warnings:
271+
expected_warnings = [LEGACY_LOCALE_WARNING]
251272

252273
base_var_dict = {
253274
"LANG": "",
254275
"LC_CTYPE": "",
255276
"LC_ALL": "",
256277
}
278+
base_var_dict.update(extra_vars)
257279
for env_var in ("LANG", "LC_CTYPE"):
258280
for locale_to_set in ("", "C", "POSIX", "invalid.ascii"):
259-
# XXX (ncoghlan): Mac OS X doesn't behave as expected in the
281+
# XXX (ncoghlan): *BSD platforms don't behave as expected in the
260282
# POSIX locale, so we skip that for now
261-
if sys.platform == "darwin" and locale_to_set == "POSIX":
283+
# See https://bugs.python.org/issue30672 for discussion
284+
if locale_to_set == "POSIX":
262285
continue
263286
with self.subTest(env_var=env_var,
264287
nominal_locale=locale_to_set,
@@ -267,33 +290,62 @@ def _check_c_locale_coercion(self, fs_encoding, stream_encoding, coerce_c_locale
267290
var_dict[env_var] = locale_to_set
268291
if coerce_c_locale is not None:
269292
var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale
293+
# Check behaviour on successful coercion
270294
self._check_child_encoding_details(var_dict,
271295
fs_encoding,
272296
stream_encoding,
273-
expected_warning)
297+
expected_warnings,
298+
coercion_expected)
274299

275300
def test_test_PYTHONCOERCECLOCALE_not_set(self):
276301
# This should coerce to the first available target locale by default
277302
self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=None)
278303

279304
def test_PYTHONCOERCECLOCALE_not_zero(self):
280-
# *Any* string other that "0" is considered "set" for our purposes
305+
# *Any* string other than "0" is considered "set" for our purposes
281306
# and hence should result in the locale coercion being enabled
282307
for setting in ("", "1", "true", "false"):
283308
self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=setting)
284309

310+
def test_PYTHONCOERCECLOCALE_set_to_warn(self):
311+
# PYTHONCOERCECLOCALE=warn enables runtime warnings for legacy locales
312+
self._check_c_locale_coercion("utf-8", "utf-8",
313+
coerce_c_locale="warn",
314+
expected_warnings=[CLI_COERCION_WARNING])
315+
316+
285317
def test_PYTHONCOERCECLOCALE_set_to_zero(self):
286318
# The setting "0" should result in the locale coercion being disabled
287319
self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
288320
C_LOCALE_STREAM_ENCODING,
289-
coerce_c_locale="0")
321+
coerce_c_locale="0",
322+
coercion_expected=False)
323+
# Setting LC_ALL=C shouldn't make any difference to the behaviour
324+
self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
325+
C_LOCALE_STREAM_ENCODING,
326+
coerce_c_locale="0",
327+
LC_ALL="C",
328+
coercion_expected=False)
290329

330+
def test_LC_ALL_set_to_C(self):
331+
# Setting LC_ALL should render the locale coercion ineffective
332+
self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
333+
C_LOCALE_STREAM_ENCODING,
334+
coerce_c_locale=None,
335+
LC_ALL="C",
336+
coercion_expected=False)
337+
# And result in a warning about a lack of locale compatibility
338+
self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
339+
C_LOCALE_STREAM_ENCODING,
340+
coerce_c_locale="warn",
341+
LC_ALL="C",
342+
expected_warnings=[LEGACY_LOCALE_WARNING],
343+
coercion_expected=False)
291344

292345
def test_main():
293346
test.support.run_unittest(
294347
LocaleConfigurationTests,
295-
LocaleCoercionTests,
296-
LocaleWarningTests
348+
LocaleCoercionTests
297349
)
298350
test.support.reap_children()
299351

0 commit comments

Comments
 (0)