22
22
else :
23
23
C_LOCALE_FS_ENCODING = C_LOCALE_STREAM_ENCODING
24
24
25
- # XXX (ncoghlan): The above is probably still wrong for :
25
+ # Note that the above is probably still wrong in some cases, such as :
26
26
# * Windows when PYTHONLEGACYWINDOWSFSENCODING is set
27
27
# * AIX and any other platforms that use latin-1 in the C locale
28
+ #
29
+ # Options for dealing with this:
30
+ # * Don't set PYTHON_COERCE_C_LOCALE on such platforms (e.g. Windows doesn't)
31
+ # * Fix the test expectations to match the actual platform behaviour
28
32
29
33
# In order to get the warning messages to match up as expected, the candidate
30
34
# order here must much the target locale order in Python/pylifecycle.c
31
- _C_UTF8_LOCALES = ("C.UTF-8" , "C.utf8" , "UTF-8" )
35
+ _C_UTF8_LOCALES = ("C.UTF-8" , "C.utf8" ) #, "UTF-8")
36
+
37
+ # XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to
38
+ # problems encountered on *BSD systems with those test cases
39
+ # For additional details see:
40
+ # nl_langinfo CODESET error: https://bugs.python.org/issue30647
41
+ # locale handling differences: https://bugs.python.org/issue30672
32
42
33
43
# There's no reliable cross-platform way of checking locale alias
34
44
# lists, so the only way of knowing which of these locales will work
@@ -40,28 +50,39 @@ def _set_locale_in_subprocess(locale_name):
40
50
result , py_cmd = run_python_until_end ("-c" , cmd , __isolated = True )
41
51
return result .rc == 0
42
52
43
- _EncodingDetails = namedtuple ( "EncodingDetails" ,
44
- "fsencoding stdin_info stdout_info stderr_info" )
53
+ _fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all"
54
+ _EncodingDetails = namedtuple ( "EncodingDetails" , _fields )
45
55
46
56
class EncodingDetails (_EncodingDetails ):
57
+ # XXX (ncoghlan): Using JSON for child state reporting may be less fragile
47
58
CHILD_PROCESS_SCRIPT = ";" .join ([
48
- "import sys" ,
59
+ "import sys, os " ,
49
60
"print(sys.getfilesystemencoding())" ,
50
61
"print(sys.stdin.encoding + ':' + sys.stdin.errors)" ,
51
62
"print(sys.stdout.encoding + ':' + sys.stdout.errors)" ,
52
63
"print(sys.stderr.encoding + ':' + sys.stderr.errors)" ,
64
+ "print(os.environ.get('LANG', 'not set'))" ,
65
+ "print(os.environ.get('LC_CTYPE', 'not set'))" ,
66
+ "print(os.environ.get('LC_ALL', 'not set'))" ,
53
67
])
54
68
55
69
@classmethod
56
- def get_expected_details (cls , fs_encoding , stream_encoding ):
70
+ def get_expected_details (cls , coercion_expected , fs_encoding , stream_encoding , env_vars ):
57
71
"""Returns expected child process details for a given encoding"""
58
72
_stream = stream_encoding + ":{}"
59
73
# stdin and stdout should use surrogateescape either because the
60
74
# coercion triggered, or because the C locale was detected
61
75
stream_info = 2 * [_stream .format ("surrogateescape" )]
62
76
# stderr should always use backslashreplace
63
77
stream_info .append (_stream .format ("backslashreplace" ))
64
- return dict (cls (fs_encoding , * stream_info )._asdict ())
78
+ expected_lang = env_vars .get ("LANG" , "not set" ).lower ()
79
+ if coercion_expected :
80
+ expected_lc_ctype = CLI_COERCION_TARGET .lower ()
81
+ else :
82
+ expected_lc_ctype = env_vars .get ("LC_CTYPE" , "not set" ).lower ()
83
+ expected_lc_all = env_vars .get ("LC_ALL" , "not set" ).lower ()
84
+ env_info = expected_lang , expected_lc_ctype , expected_lc_all
85
+ return dict (cls (fs_encoding , * stream_info , * env_info )._asdict ())
65
86
66
87
@staticmethod
67
88
def _handle_output_variations (data ):
@@ -97,64 +118,20 @@ def get_child_details(cls, env_vars):
97
118
result .fail (py_cmd )
98
119
# All subprocess outputs in this test case should be pure ASCII
99
120
adjusted_output = cls ._handle_output_variations (result .out )
100
- stdout_lines = adjusted_output .decode ("ascii" ).rstrip (). splitlines ()
121
+ stdout_lines = adjusted_output .decode ("ascii" ).splitlines ()
101
122
child_encoding_details = dict (cls (* stdout_lines )._asdict ())
102
123
stderr_lines = result .err .decode ("ascii" ).rstrip ().splitlines ()
103
124
return child_encoding_details , stderr_lines
104
125
105
126
106
- class _ChildProcessEncodingTestCase (unittest .TestCase ):
107
- # Base class to check for expected encoding details in a child process
108
-
109
- def _check_child_encoding_details (self ,
110
- env_vars ,
111
- expected_fs_encoding ,
112
- expected_stream_encoding ,
113
- expected_warning ):
114
- """Check the C locale handling for the given process environment
115
-
116
- Parameters:
117
- expected_fs_encoding: expected sys.getfilesystemencoding() result
118
- expected_stream_encoding: expected encoding for standard streams
119
- expected_warning: stderr output to expect (if any)
120
- """
121
- result = EncodingDetails .get_child_details (env_vars )
122
- encoding_details , stderr_lines = result
123
- self .assertEqual (encoding_details ,
124
- EncodingDetails .get_expected_details (
125
- expected_fs_encoding ,
126
- expected_stream_encoding ))
127
- self .assertEqual (stderr_lines , expected_warning )
128
-
129
127
# Details of the shared library warning emitted at runtime
130
- LIBRARY_C_LOCALE_WARNING = (
128
+ LEGACY_LOCALE_WARNING = (
131
129
"Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
132
130
"encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "
133
131
"C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
134
132
"locales is recommended."
135
133
)
136
134
137
- @unittest .skipUnless (sysconfig .get_config_var ("PY_WARN_ON_C_LOCALE" ),
138
- "C locale runtime warning disabled at build time" )
139
- class LocaleWarningTests (_ChildProcessEncodingTestCase ):
140
- # Test warning emitted when running in the C locale
141
-
142
- def test_library_c_locale_warning (self ):
143
- self .maxDiff = None
144
- for locale_to_set in ("C" , "POSIX" , "invalid.ascii" ):
145
- # XXX (ncoghlan): Mac OS X doesn't behave as expected in the
146
- # POSIX locale, so we skip that for now
147
- if sys .platform == "darwin" and locale_to_set == "POSIX" :
148
- continue
149
- var_dict = {
150
- "LC_ALL" : locale_to_set
151
- }
152
- with self .subTest (forced_locale = locale_to_set ):
153
- self ._check_child_encoding_details (var_dict ,
154
- C_LOCALE_FS_ENCODING ,
155
- C_LOCALE_STREAM_ENCODING ,
156
- [LIBRARY_C_LOCALE_WARNING ])
157
-
158
135
# Details of the CLI locale coercion warning emitted at runtime
159
136
CLI_COERCION_WARNING_FMT = (
160
137
"Python detected LC_CTYPE=C: LC_CTYPE coerced to {} (set another locale "
@@ -163,9 +140,13 @@ def test_library_c_locale_warning(self):
163
140
164
141
165
142
AVAILABLE_TARGETS = None
143
+ CLI_COERCION_TARGET = None
144
+ CLI_COERCION_WARNING = None
166
145
167
146
def setUpModule ():
168
147
global AVAILABLE_TARGETS
148
+ global CLI_COERCION_TARGET
149
+ global CLI_COERCION_WARNING
169
150
170
151
if AVAILABLE_TARGETS is not None :
171
152
# initialization already done
@@ -177,26 +158,57 @@ def setUpModule():
177
158
if _set_locale_in_subprocess (target_locale ):
178
159
AVAILABLE_TARGETS .append (target_locale )
179
160
161
+ if AVAILABLE_TARGETS :
162
+ # Coercion is expected to use the first available target locale
163
+ CLI_COERCION_TARGET = AVAILABLE_TARGETS [0 ]
164
+ CLI_COERCION_WARNING = CLI_COERCION_WARNING_FMT .format (CLI_COERCION_TARGET )
180
165
181
166
182
- class _LocaleCoercionTargetsTestCase ( _ChildProcessEncodingTestCase ):
183
- # Base class for test cases that rely on coercion targets being defined
167
+ class _LocaleHandlingTestCase ( unittest . TestCase ):
168
+ # Base class to check expected locale handling behaviour
184
169
185
- @classmethod
186
- def setUpClass (cls ):
187
- if not AVAILABLE_TARGETS :
188
- raise unittest .SkipTest ("No C-with-UTF-8 locale available" )
170
+ def _check_child_encoding_details (self ,
171
+ env_vars ,
172
+ expected_fs_encoding ,
173
+ expected_stream_encoding ,
174
+ expected_warnings ,
175
+ coercion_expected ):
176
+ """Check the C locale handling for the given process environment
189
177
178
+ Parameters:
179
+ expected_fs_encoding: expected sys.getfilesystemencoding() result
180
+ expected_stream_encoding: expected encoding for standard streams
181
+ expected_warning: stderr output to expect (if any)
182
+ """
183
+ result = EncodingDetails .get_child_details (env_vars )
184
+ encoding_details , stderr_lines = result
185
+ expected_details = EncodingDetails .get_expected_details (
186
+ coercion_expected ,
187
+ expected_fs_encoding ,
188
+ expected_stream_encoding ,
189
+ env_vars
190
+ )
191
+ self .assertEqual (encoding_details , expected_details )
192
+ if expected_warnings is None :
193
+ expected_warnings = []
194
+ self .assertEqual (stderr_lines , expected_warnings )
190
195
191
- class LocaleConfigurationTests (_LocaleCoercionTargetsTestCase ):
196
+
197
+ class LocaleConfigurationTests (_LocaleHandlingTestCase ):
192
198
# Test explicit external configuration via the process environment
193
199
200
+ def setUpClass ():
201
+ # This relies on setupModule() having been run, so it can't be
202
+ # handled via the @unittest.skipUnless decorator
203
+ if not AVAILABLE_TARGETS :
204
+ raise unittest .SkipTest ("No C-with-UTF-8 locale available" )
205
+
194
206
def test_external_target_locale_configuration (self ):
207
+
195
208
# Explicitly setting a target locale should give the same behaviour as
196
209
# is seen when implicitly coercing to that target locale
197
210
self .maxDiff = None
198
211
199
- expected_warning = []
200
212
expected_fs_encoding = "utf-8"
201
213
expected_stream_encoding = "utf-8"
202
214
@@ -209,6 +221,7 @@ def test_external_target_locale_configuration(self):
209
221
for locale_to_set in AVAILABLE_TARGETS :
210
222
# XXX (ncoghlan): LANG=UTF-8 doesn't appear to work as
211
223
# expected, so skip that combination for now
224
+ # See https://bugs.python.org/issue30672 for discussion
212
225
if env_var == "LANG" and locale_to_set == "UTF-8" :
213
226
continue
214
227
@@ -219,17 +232,23 @@ def test_external_target_locale_configuration(self):
219
232
self ._check_child_encoding_details (var_dict ,
220
233
expected_fs_encoding ,
221
234
expected_stream_encoding ,
222
- expected_warning )
235
+ expected_warnings = None ,
236
+ coercion_expected = False )
223
237
224
238
225
239
226
240
@test .support .cpython_only
227
241
@unittest .skipUnless (sysconfig .get_config_var ("PY_COERCE_C_LOCALE" ),
228
242
"C locale coercion disabled at build time" )
229
- class LocaleCoercionTests (_LocaleCoercionTargetsTestCase ):
243
+ class LocaleCoercionTests (_LocaleHandlingTestCase ):
230
244
# Test implicit reconfiguration of the environment during CLI startup
231
245
232
- def _check_c_locale_coercion (self , fs_encoding , stream_encoding , coerce_c_locale ):
246
+ def _check_c_locale_coercion (self ,
247
+ fs_encoding , stream_encoding ,
248
+ coerce_c_locale ,
249
+ expected_warnings = None ,
250
+ coercion_expected = True ,
251
+ ** extra_vars ):
233
252
"""Check the C locale handling for various configurations
234
253
235
254
Parameters:
@@ -238,27 +257,31 @@ def _check_c_locale_coercion(self, fs_encoding, stream_encoding, coerce_c_locale
238
257
coerce_c_locale: setting to use for PYTHONCOERCECLOCALE
239
258
None: don't set the variable at all
240
259
str: the value set in the child's environment
260
+ expected_warnings: expected warning lines on stderr
261
+ extra_vars: additional environment variables to set in subprocess
241
262
"""
242
-
243
- # Check for expected warning on stderr if C locale is coerced
244
263
self .maxDiff = None
245
264
246
- expected_warning = []
247
- if coerce_c_locale != "0" :
248
- # Expect coercion to use the first available locale
249
- warning_msg = CLI_COERCION_WARNING_FMT .format (AVAILABLE_TARGETS [0 ])
250
- expected_warning .append (warning_msg )
265
+ if not AVAILABLE_TARGETS :
266
+ # Locale coercion is disabled when there aren't any target locales
267
+ fs_encoding = C_LOCALE_FS_ENCODING
268
+ stream_encoding = C_LOCALE_STREAM_ENCODING
269
+ coercion_expected = False
270
+ if expected_warnings :
271
+ expected_warnings = [LEGACY_LOCALE_WARNING ]
251
272
252
273
base_var_dict = {
253
274
"LANG" : "" ,
254
275
"LC_CTYPE" : "" ,
255
276
"LC_ALL" : "" ,
256
277
}
278
+ base_var_dict .update (extra_vars )
257
279
for env_var in ("LANG" , "LC_CTYPE" ):
258
280
for locale_to_set in ("" , "C" , "POSIX" , "invalid.ascii" ):
259
- # XXX (ncoghlan): Mac OS X doesn 't behave as expected in the
281
+ # XXX (ncoghlan): *BSD platforms don 't behave as expected in the
260
282
# POSIX locale, so we skip that for now
261
- if sys .platform == "darwin" and locale_to_set == "POSIX" :
283
+ # See https://bugs.python.org/issue30672 for discussion
284
+ if locale_to_set == "POSIX" :
262
285
continue
263
286
with self .subTest (env_var = env_var ,
264
287
nominal_locale = locale_to_set ,
@@ -267,33 +290,62 @@ def _check_c_locale_coercion(self, fs_encoding, stream_encoding, coerce_c_locale
267
290
var_dict [env_var ] = locale_to_set
268
291
if coerce_c_locale is not None :
269
292
var_dict ["PYTHONCOERCECLOCALE" ] = coerce_c_locale
293
+ # Check behaviour on successful coercion
270
294
self ._check_child_encoding_details (var_dict ,
271
295
fs_encoding ,
272
296
stream_encoding ,
273
- expected_warning )
297
+ expected_warnings ,
298
+ coercion_expected )
274
299
275
300
def test_test_PYTHONCOERCECLOCALE_not_set (self ):
276
301
# This should coerce to the first available target locale by default
277
302
self ._check_c_locale_coercion ("utf-8" , "utf-8" , coerce_c_locale = None )
278
303
279
304
def test_PYTHONCOERCECLOCALE_not_zero (self ):
280
- # *Any* string other that "0" is considered "set" for our purposes
305
+ # *Any* string other than "0" is considered "set" for our purposes
281
306
# and hence should result in the locale coercion being enabled
282
307
for setting in ("" , "1" , "true" , "false" ):
283
308
self ._check_c_locale_coercion ("utf-8" , "utf-8" , coerce_c_locale = setting )
284
309
310
+ def test_PYTHONCOERCECLOCALE_set_to_warn (self ):
311
+ # PYTHONCOERCECLOCALE=warn enables runtime warnings for legacy locales
312
+ self ._check_c_locale_coercion ("utf-8" , "utf-8" ,
313
+ coerce_c_locale = "warn" ,
314
+ expected_warnings = [CLI_COERCION_WARNING ])
315
+
316
+
285
317
def test_PYTHONCOERCECLOCALE_set_to_zero (self ):
286
318
# The setting "0" should result in the locale coercion being disabled
287
319
self ._check_c_locale_coercion (C_LOCALE_FS_ENCODING ,
288
320
C_LOCALE_STREAM_ENCODING ,
289
- coerce_c_locale = "0" )
321
+ coerce_c_locale = "0" ,
322
+ coercion_expected = False )
323
+ # Setting LC_ALL=C shouldn't make any difference to the behaviour
324
+ self ._check_c_locale_coercion (C_LOCALE_FS_ENCODING ,
325
+ C_LOCALE_STREAM_ENCODING ,
326
+ coerce_c_locale = "0" ,
327
+ LC_ALL = "C" ,
328
+ coercion_expected = False )
290
329
330
+ def test_LC_ALL_set_to_C (self ):
331
+ # Setting LC_ALL should render the locale coercion ineffective
332
+ self ._check_c_locale_coercion (C_LOCALE_FS_ENCODING ,
333
+ C_LOCALE_STREAM_ENCODING ,
334
+ coerce_c_locale = None ,
335
+ LC_ALL = "C" ,
336
+ coercion_expected = False )
337
+ # And result in a warning about a lack of locale compatibility
338
+ self ._check_c_locale_coercion (C_LOCALE_FS_ENCODING ,
339
+ C_LOCALE_STREAM_ENCODING ,
340
+ coerce_c_locale = "warn" ,
341
+ LC_ALL = "C" ,
342
+ expected_warnings = [LEGACY_LOCALE_WARNING ],
343
+ coercion_expected = False )
291
344
292
345
def test_main ():
293
346
test .support .run_unittest (
294
347
LocaleConfigurationTests ,
295
- LocaleCoercionTests ,
296
- LocaleWarningTests
348
+ LocaleCoercionTests
297
349
)
298
350
test .support .reap_children ()
299
351
0 commit comments