From da2ffe2721274e76b9a0570e0da311a0c0abfb51 Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Sat, 17 Jun 2017 16:40:10 +1000 Subject: [PATCH 1/9] bpo-30565: Add PYTHONCOERCECLOCALE=warn runtime flag - legacy locale warnings are now silent by default - build time configuration setting is removed - set PYTHONCOERCECLOCALE=warn to enable them - updates test cases accordingly --- Lib/test/test_c_locale_coercion.py | 170 +++++++++++++++++------------ Python/pylifecycle.c | 56 ++++++---- 2 files changed, 135 insertions(+), 91 deletions(-) diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index aa0771c798d8a2..5ca3cc5e1c3c2c 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -22,9 +22,13 @@ else: C_LOCALE_FS_ENCODING = C_LOCALE_STREAM_ENCODING -# XXX (ncoghlan): The above is probably still wrong for: +# Note that the above is probably still wrong in some cases, such as: # * Windows when PYTHONLEGACYWINDOWSFSENCODING is set # * AIX and any other platforms that use latin-1 in the C locale +# +# Options for dealing with this: +# * Don't set PYTHON_COERCE_C_LOCALE on such platforms (e.g. Windows doesn't) +# * Fix the test expectations to match the actual platform behaviour # In order to get the warning messages to match up as expected, the candidate # order here must much the target locale order in Python/pylifecycle.c @@ -40,20 +44,24 @@ def _set_locale_in_subprocess(locale_name): result, py_cmd = run_python_until_end("-c", cmd, __isolated=True) return result.rc == 0 -_EncodingDetails = namedtuple("EncodingDetails", - "fsencoding stdin_info stdout_info stderr_info") +_fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all" +_EncodingDetails = namedtuple("EncodingDetails", _fields) class EncodingDetails(_EncodingDetails): + # XXX (ncoghlan): Using JSON for child state reporting may be less fragile CHILD_PROCESS_SCRIPT = ";".join([ - "import sys", + "import sys, os", "print(sys.getfilesystemencoding())", "print(sys.stdin.encoding + ':' + sys.stdin.errors)", "print(sys.stdout.encoding + ':' + sys.stdout.errors)", "print(sys.stderr.encoding + ':' + sys.stderr.errors)", + "print(os.environ.get('LANG', 'not set'))", + "print(os.environ.get('LC_CTYPE', 'not set'))", + "print(os.environ.get('LC_ALL', 'not set'))", ]) @classmethod - def get_expected_details(cls, fs_encoding, stream_encoding): + def get_expected_details(cls, coercion_expected, fs_encoding, stream_encoding, env_vars): """Returns expected child process details for a given encoding""" _stream = stream_encoding + ":{}" # stdin and stdout should use surrogateescape either because the @@ -61,7 +69,14 @@ def get_expected_details(cls, fs_encoding, stream_encoding): stream_info = 2*[_stream.format("surrogateescape")] # stderr should always use backslashreplace stream_info.append(_stream.format("backslashreplace")) - return dict(cls(fs_encoding, *stream_info)._asdict()) + expected_lang = env_vars.get("LANG", "not set").lower() + if coercion_expected: + expected_lc_ctype = CLI_COERCION_TARGET.lower() + else: + expected_lc_ctype = env_vars.get("LC_CTYPE", "not set").lower() + expected_lc_all = env_vars.get("LC_ALL", "not set").lower() + env_info = expected_lang, expected_lc_ctype, expected_lc_all + return dict(cls(fs_encoding, *stream_info, *env_info)._asdict()) @staticmethod def _handle_output_variations(data): @@ -97,64 +112,20 @@ def get_child_details(cls, env_vars): result.fail(py_cmd) # All subprocess outputs in this test case should be pure ASCII adjusted_output = cls._handle_output_variations(result.out) - stdout_lines = adjusted_output.decode("ascii").rstrip().splitlines() + stdout_lines = adjusted_output.decode("ascii").splitlines() child_encoding_details = dict(cls(*stdout_lines)._asdict()) stderr_lines = result.err.decode("ascii").rstrip().splitlines() return child_encoding_details, stderr_lines -class _ChildProcessEncodingTestCase(unittest.TestCase): - # Base class to check for expected encoding details in a child process - - def _check_child_encoding_details(self, - env_vars, - expected_fs_encoding, - expected_stream_encoding, - expected_warning): - """Check the C locale handling for the given process environment - - Parameters: - expected_fs_encoding: expected sys.getfilesystemencoding() result - expected_stream_encoding: expected encoding for standard streams - expected_warning: stderr output to expect (if any) - """ - result = EncodingDetails.get_child_details(env_vars) - encoding_details, stderr_lines = result - self.assertEqual(encoding_details, - EncodingDetails.get_expected_details( - expected_fs_encoding, - expected_stream_encoding)) - self.assertEqual(stderr_lines, expected_warning) - # Details of the shared library warning emitted at runtime -LIBRARY_C_LOCALE_WARNING = ( +LEGACY_LOCALE_WARNING = ( "Python runtime initialized with LC_CTYPE=C (a locale with default ASCII " "encoding), which may cause Unicode compatibility problems. Using C.UTF-8, " "C.utf8, or UTF-8 (if available) as alternative Unicode-compatible " "locales is recommended." ) -@unittest.skipUnless(sysconfig.get_config_var("PY_WARN_ON_C_LOCALE"), - "C locale runtime warning disabled at build time") -class LocaleWarningTests(_ChildProcessEncodingTestCase): - # Test warning emitted when running in the C locale - - def test_library_c_locale_warning(self): - self.maxDiff = None - for locale_to_set in ("C", "POSIX", "invalid.ascii"): - # XXX (ncoghlan): Mac OS X doesn't behave as expected in the - # POSIX locale, so we skip that for now - if sys.platform == "darwin" and locale_to_set == "POSIX": - continue - var_dict = { - "LC_ALL": locale_to_set - } - with self.subTest(forced_locale=locale_to_set): - self._check_child_encoding_details(var_dict, - C_LOCALE_FS_ENCODING, - C_LOCALE_STREAM_ENCODING, - [LIBRARY_C_LOCALE_WARNING]) - # Details of the CLI locale coercion warning emitted at runtime CLI_COERCION_WARNING_FMT = ( "Python detected LC_CTYPE=C: LC_CTYPE coerced to {} (set another locale " @@ -163,9 +134,13 @@ def test_library_c_locale_warning(self): AVAILABLE_TARGETS = None +CLI_COERCION_TARGET = None +CLI_COERCION_WARNING = None def setUpModule(): global AVAILABLE_TARGETS + global CLI_COERCION_TARGET + global CLI_COERCION_WARNING if AVAILABLE_TARGETS is not None: # initialization already done @@ -177,9 +152,12 @@ def setUpModule(): if _set_locale_in_subprocess(target_locale): AVAILABLE_TARGETS.append(target_locale) + # Coercion is expected to use the first available target locale + CLI_COERCION_TARGET = AVAILABLE_TARGETS[0] + CLI_COERCION_WARNING = CLI_COERCION_WARNING_FMT.format(CLI_COERCION_TARGET) -class _LocaleCoercionTargetsTestCase(_ChildProcessEncodingTestCase): +class _LocaleCoercionTargetsTestCase(unittest.TestCase): # Base class for test cases that rely on coercion targets being defined @classmethod @@ -187,6 +165,32 @@ def setUpClass(cls): if not AVAILABLE_TARGETS: raise unittest.SkipTest("No C-with-UTF-8 locale available") + def _check_child_encoding_details(self, + env_vars, + expected_fs_encoding, + expected_stream_encoding, + expected_warnings, + coercion_expected): + """Check the C locale handling for the given process environment + + Parameters: + expected_fs_encoding: expected sys.getfilesystemencoding() result + expected_stream_encoding: expected encoding for standard streams + expected_warning: stderr output to expect (if any) + """ + result = EncodingDetails.get_child_details(env_vars) + encoding_details, stderr_lines = result + expected_details = EncodingDetails.get_expected_details( + coercion_expected, + expected_fs_encoding, + expected_stream_encoding, + env_vars + ) + self.assertEqual(encoding_details, expected_details) + if expected_warnings is None: + expected_warnings = [] + self.assertEqual(stderr_lines, expected_warnings) + class LocaleConfigurationTests(_LocaleCoercionTargetsTestCase): # Test explicit external configuration via the process environment @@ -196,7 +200,6 @@ def test_external_target_locale_configuration(self): # is seen when implicitly coercing to that target locale self.maxDiff = None - expected_warning = [] expected_fs_encoding = "utf-8" expected_stream_encoding = "utf-8" @@ -219,7 +222,8 @@ def test_external_target_locale_configuration(self): self._check_child_encoding_details(var_dict, expected_fs_encoding, expected_stream_encoding, - expected_warning) + expected_warnings=None, + coercion_expected=False) @@ -229,7 +233,12 @@ def test_external_target_locale_configuration(self): class LocaleCoercionTests(_LocaleCoercionTargetsTestCase): # Test implicit reconfiguration of the environment during CLI startup - def _check_c_locale_coercion(self, fs_encoding, stream_encoding, coerce_c_locale): + def _check_c_locale_coercion(self, + fs_encoding, stream_encoding, + coerce_c_locale, + expected_warnings=None, + coercion_expected=True, + **extra_vars): """Check the C locale handling for various configurations Parameters: @@ -238,22 +247,19 @@ def _check_c_locale_coercion(self, fs_encoding, stream_encoding, coerce_c_locale coerce_c_locale: setting to use for PYTHONCOERCECLOCALE None: don't set the variable at all str: the value set in the child's environment + expected_warnings: expected warning lines on stderr + extra_vars: additional environment variables to set in subprocess """ # Check for expected warning on stderr if C locale is coerced self.maxDiff = None - expected_warning = [] - if coerce_c_locale != "0": - # Expect coercion to use the first available locale - warning_msg = CLI_COERCION_WARNING_FMT.format(AVAILABLE_TARGETS[0]) - expected_warning.append(warning_msg) - base_var_dict = { "LANG": "", "LC_CTYPE": "", "LC_ALL": "", } + base_var_dict.update(extra_vars) for env_var in ("LANG", "LC_CTYPE"): for locale_to_set in ("", "C", "POSIX", "invalid.ascii"): # XXX (ncoghlan): Mac OS X doesn't behave as expected in the @@ -267,33 +273,63 @@ def _check_c_locale_coercion(self, fs_encoding, stream_encoding, coerce_c_locale var_dict[env_var] = locale_to_set if coerce_c_locale is not None: var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale + # Check behaviour on successful coercion self._check_child_encoding_details(var_dict, fs_encoding, stream_encoding, - expected_warning) + expected_warnings, + coercion_expected) def test_test_PYTHONCOERCECLOCALE_not_set(self): # This should coerce to the first available target locale by default self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=None) def test_PYTHONCOERCECLOCALE_not_zero(self): - # *Any* string other that "0" is considered "set" for our purposes + # *Any* string other than "0" is considered "set" for our purposes # and hence should result in the locale coercion being enabled for setting in ("", "1", "true", "false"): self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=setting) + def test_PYTHONCOERCECLOCALE_set_to_warn(self): + # PYTHONCOERCECLOCALE=warn enables runtime warnings for legacy locales + self._check_c_locale_coercion("utf-8", "utf-8", + coerce_c_locale="warn", + expected_warnings=[CLI_COERCION_WARNING]) + + def test_PYTHONCOERCECLOCALE_set_to_zero(self): # The setting "0" should result in the locale coercion being disabled self._check_c_locale_coercion(C_LOCALE_FS_ENCODING, C_LOCALE_STREAM_ENCODING, - coerce_c_locale="0") + coerce_c_locale="0", + coercion_expected=False) + # Setting LC_ALL=C shouldn't make any difference to the behaviour + self._check_c_locale_coercion(C_LOCALE_FS_ENCODING, + C_LOCALE_STREAM_ENCODING, + coerce_c_locale="0", + LC_ALL="C", + coercion_expected=False) + def test_LC_ALL_set_to_C(self): + # Setting LC_ALL should render the locale coercion ineffective + self._check_c_locale_coercion(C_LOCALE_FS_ENCODING, + C_LOCALE_STREAM_ENCODING, + coerce_c_locale=None, + LC_ALL="C", + coercion_expected=False) + # And result in a second warning indicating locale coercion didn't work + ineffective_coercion_warning = [LEGACY_LOCALE_WARNING] + self._check_c_locale_coercion(C_LOCALE_FS_ENCODING, + C_LOCALE_STREAM_ENCODING, + coerce_c_locale="warn", + LC_ALL="C", + expected_warnings=ineffective_coercion_warning, + coercion_expected=False) def test_main(): test.support.run_unittest( LocaleConfigurationTests, - LocaleCoercionTests, - LocaleWarningTests + LocaleCoercionTests ) test.support.reap_children() diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index b7c98225641176..aa49892e31b0f2 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -356,6 +356,10 @@ _Py_LegacyLocaleDetected(void) { #ifndef MS_WINDOWS /* On non-Windows systems, the C locale is considered a legacy locale */ + /* XXX (ncoghlan): some platforms (notably Mac OS X) don't appear to treat + * the POSIX locale as a simple alias for the C locale, so + * we may also want to check for that explicitly. + */ const char *ctype_loc = setlocale(LC_CTYPE, NULL); return ctype_loc != NULL && strcmp(ctype_loc, "C") == 0; #else @@ -364,6 +368,30 @@ _Py_LegacyLocaleDetected(void) #endif } +static const char *_C_LOCALE_WARNING = + "Python runtime initialized with LC_CTYPE=C (a locale with default ASCII " + "encoding), which may cause Unicode compatibility problems. Using C.UTF-8, " + "C.utf8, or UTF-8 (if available) as alternative Unicode-compatible " + "locales is recommended.\n"; + +static int +_legacy_locale_warnings_enabled(void) +{ + const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE"); + return (coerce_c_locale != NULL && + strncmp(coerce_c_locale, "warn", 5) == 0); +} + +static void +_emit_stderr_warning_for_legacy_locale(void) +{ + if (_legacy_locale_warnings_enabled()) { + if (_Py_LegacyLocaleDetected()) { + fprintf(stderr, "%s", _C_LOCALE_WARNING); + } + } +} + typedef struct _CandidateLocale { const char *locale_name; /* The locale to try as a coercion target */ } _LocaleCoercionTarget; @@ -419,7 +447,9 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target) "Error setting LC_CTYPE, skipping C locale coercion\n"); return; } - fprintf(stderr, _C_LOCALE_COERCION_WARNING, newloc); + if (_legacy_locale_warnings_enabled()) { + fprintf(stderr, _C_LOCALE_COERCION_WARNING, newloc); + } /* Reconfigure with the overridden environment variables */ setlocale(LC_ALL, ""); @@ -465,26 +495,6 @@ _Py_CoerceLegacyLocale(void) } -#ifdef PY_WARN_ON_C_LOCALE -static const char *_C_LOCALE_WARNING = - "Python runtime initialized with LC_CTYPE=C (a locale with default ASCII " - "encoding), which may cause Unicode compatibility problems. Using C.UTF-8, " - "C.utf8, or UTF-8 (if available) as alternative Unicode-compatible " - "locales is recommended.\n"; - -static void -_emit_stderr_warning_for_c_locale(void) -{ - const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE"); - if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) { - if (_Py_LegacyLocaleDetected()) { - fprintf(stderr, "%s", _C_LOCALE_WARNING); - } - } -} -#endif - - /* Global initializations. Can be undone by Py_Finalize(). Don't call this twice without an intervening Py_Finalize() call. @@ -561,9 +571,7 @@ void _Py_InitializeCore(const _PyCoreConfig *config) the locale's charset without having to switch locales. */ setlocale(LC_CTYPE, ""); -#ifdef PY_WARN_ON_C_LOCALE - _emit_stderr_warning_for_c_locale(); -#endif + _emit_stderr_warning_for_legacy_locale(); #endif #endif From 093c8da161c7f57594c0044e64c64c29bd16d6e1 Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Sat, 17 Jun 2017 23:26:36 +1000 Subject: [PATCH 2/9] Handle lack of coercion targets in tests --- Lib/test/test_c_locale_coercion.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index 5ca3cc5e1c3c2c..4a7cd721a1eb90 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -152,9 +152,10 @@ def setUpModule(): if _set_locale_in_subprocess(target_locale): AVAILABLE_TARGETS.append(target_locale) - # Coercion is expected to use the first available target locale - CLI_COERCION_TARGET = AVAILABLE_TARGETS[0] - CLI_COERCION_WARNING = CLI_COERCION_WARNING_FMT.format(CLI_COERCION_TARGET) + if AVAILABLE_TARGETS: + # Coercion is expected to use the first available target locale + CLI_COERCION_TARGET = AVAILABLE_TARGETS[0] + CLI_COERCION_WARNING = CLI_COERCION_WARNING_FMT.format(CLI_COERCION_TARGET) class _LocaleCoercionTargetsTestCase(unittest.TestCase): From 58e925180fdd96957f990476d91f32ab90a122e3 Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Sat, 17 Jun 2017 23:57:30 +1000 Subject: [PATCH 3/9] Check expected behaviour without coercion target locales --- Lib/test/test_c_locale_coercion.py | 36 ++++++++++++++++++------------ 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index 4a7cd721a1eb90..26496c7b825c29 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -158,13 +158,8 @@ def setUpModule(): CLI_COERCION_WARNING = CLI_COERCION_WARNING_FMT.format(CLI_COERCION_TARGET) -class _LocaleCoercionTargetsTestCase(unittest.TestCase): - # Base class for test cases that rely on coercion targets being defined - - @classmethod - def setUpClass(cls): - if not AVAILABLE_TARGETS: - raise unittest.SkipTest("No C-with-UTF-8 locale available") +class _LocaleHandlingTestCase(unittest.TestCase): + # Base class to check expected locale handling behaviour def _check_child_encoding_details(self, env_vars, @@ -193,10 +188,17 @@ def _check_child_encoding_details(self, self.assertEqual(stderr_lines, expected_warnings) -class LocaleConfigurationTests(_LocaleCoercionTargetsTestCase): +class LocaleConfigurationTests(_LocaleHandlingTestCase): # Test explicit external configuration via the process environment + def setUpClass(): + # This relies on setupModule() having been run, so it can't be + # handled via the @unittest.skipUnless decorator + if not AVAILABLE_TARGETS: + raise unittest.SkipTest("No C-with-UTF-8 locale available") + def test_external_target_locale_configuration(self): + # Explicitly setting a target locale should give the same behaviour as # is seen when implicitly coercing to that target locale self.maxDiff = None @@ -213,6 +215,7 @@ def test_external_target_locale_configuration(self): for locale_to_set in AVAILABLE_TARGETS: # XXX (ncoghlan): LANG=UTF-8 doesn't appear to work as # expected, so skip that combination for now + # See https://bugs.python.org/issue30672 for discussion if env_var == "LANG" and locale_to_set == "UTF-8": continue @@ -231,7 +234,7 @@ def test_external_target_locale_configuration(self): @test.support.cpython_only @unittest.skipUnless(sysconfig.get_config_var("PY_COERCE_C_LOCALE"), "C locale coercion disabled at build time") -class LocaleCoercionTests(_LocaleCoercionTargetsTestCase): +class LocaleCoercionTests(_LocaleHandlingTestCase): # Test implicit reconfiguration of the environment during CLI startup def _check_c_locale_coercion(self, @@ -251,10 +254,15 @@ def _check_c_locale_coercion(self, expected_warnings: expected warning lines on stderr extra_vars: additional environment variables to set in subprocess """ - - # Check for expected warning on stderr if C locale is coerced self.maxDiff = None + if AVAILABLE_TARGETS is None: + # Locale coercion is disabled when there aren't any target locales + fs_encoding = C_LOCALE_FS_ENCODING + stream_encoding = C_LOCALE_STREAM_ENCODING + expected_warnings = [] + coercion_expected = False + base_var_dict = { "LANG": "", "LC_CTYPE": "", @@ -265,6 +273,7 @@ def _check_c_locale_coercion(self, for locale_to_set in ("", "C", "POSIX", "invalid.ascii"): # XXX (ncoghlan): Mac OS X doesn't behave as expected in the # POSIX locale, so we skip that for now + # See https://bugs.python.org/issue30672 for discussion if sys.platform == "darwin" and locale_to_set == "POSIX": continue with self.subTest(env_var=env_var, @@ -318,13 +327,12 @@ def test_LC_ALL_set_to_C(self): coerce_c_locale=None, LC_ALL="C", coercion_expected=False) - # And result in a second warning indicating locale coercion didn't work - ineffective_coercion_warning = [LEGACY_LOCALE_WARNING] + # And result in a warning about a lack of locale compatibility self._check_c_locale_coercion(C_LOCALE_FS_ENCODING, C_LOCALE_STREAM_ENCODING, coerce_c_locale="warn", LC_ALL="C", - expected_warnings=ineffective_coercion_warning, + expected_warnings=[LEGACY_LOCALE_WARNING], coercion_expected=False) def test_main(): From 733f72f999615ad99fafc85d46d652e7a9cc1ee0 Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Sun, 18 Jun 2017 00:57:54 +1000 Subject: [PATCH 4/9] Correctly check for target availability --- Lib/test/test_c_locale_coercion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index 26496c7b825c29..56f42b791098f8 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -256,7 +256,7 @@ def _check_c_locale_coercion(self, """ self.maxDiff = None - if AVAILABLE_TARGETS is None: + if not AVAILABLE_TARGETS: # Locale coercion is disabled when there aren't any target locales fs_encoding = C_LOCALE_FS_ENCODING stream_encoding = C_LOCALE_STREAM_ENCODING From c0d8b8e0e18d9e793e965521d9784a1d21cee08f Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Sun, 18 Jun 2017 10:41:48 +1000 Subject: [PATCH 5/9] Expect legacy locale warning in tests --- Lib/test/test_c_locale_coercion.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index 56f42b791098f8..4fb0152b241344 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -260,8 +260,9 @@ def _check_c_locale_coercion(self, # Locale coercion is disabled when there aren't any target locales fs_encoding = C_LOCALE_FS_ENCODING stream_encoding = C_LOCALE_STREAM_ENCODING - expected_warnings = [] coercion_expected = False + if expected_warnings: + expected_warnings = [LEGACY_LOCALE_WARNING] base_var_dict = { "LANG": "", From 69fa58b005d851fb0f3a66123c616977d87efc4f Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Sun, 18 Jun 2017 11:18:22 +1000 Subject: [PATCH 6/9] Skip testing the POSIX locale for now --- Lib/test/test_c_locale_coercion.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index 4fb0152b241344..1f6054b347b3da 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -272,10 +272,10 @@ def _check_c_locale_coercion(self, base_var_dict.update(extra_vars) for env_var in ("LANG", "LC_CTYPE"): for locale_to_set in ("", "C", "POSIX", "invalid.ascii"): - # XXX (ncoghlan): Mac OS X doesn't behave as expected in the + # XXX (ncoghlan): *BSD platforms don't behave as expected in the # POSIX locale, so we skip that for now # See https://bugs.python.org/issue30672 for discussion - if sys.platform == "darwin" and locale_to_set == "POSIX": + if locale_to_set == "POSIX": continue with self.subTest(env_var=env_var, nominal_locale=locale_to_set, From d5d5e36e3e7ce363f7d29a5ca7d1d4bd0b13b917 Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Sun, 18 Jun 2017 11:31:34 +1000 Subject: [PATCH 7/9] Disable UTF-8 as a coercion target locale --- Lib/test/test_c_locale_coercion.py | 8 +++++++- Python/pylifecycle.c | 9 ++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index 1f6054b347b3da..a4b4626756adce 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -32,7 +32,13 @@ # In order to get the warning messages to match up as expected, the candidate # order here must much the target locale order in Python/pylifecycle.c -_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8", "UTF-8") +_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8") #, "UTF-8") + +# XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to +# problems encountered on *BSD systems with those test cases +# For additional details see: +# nl_langinfo CODESET error: https://bugs.python.org/issue30647 +# locale handling differences: https://bugs.python.org/issue30672 # There's no reliable cross-platform way of checking locale alias # lists, so the only way of knowing which of these locales will work diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index aa49892e31b0f2..953bc90a456bdb 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -399,10 +399,17 @@ typedef struct _CandidateLocale { static _LocaleCoercionTarget _TARGET_LOCALES[] = { {"C.UTF-8"}, {"C.utf8"}, - {"UTF-8"}, + /* {"UTF-8"}, */ {NULL} }; +/* XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to + * problems encountered on *BSD systems with those test cases + * For additional details see: + * nl_langinfo CODESET error: https://bugs.python.org/issue30647 + * locale handling differences: https://bugs.python.org/issue30672 + */ + static char * get_default_standard_stream_error_handler(void) { From e56e89d9ba939ce5462daaac1feafad19cf410f6 Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Sun, 18 Jun 2017 11:40:46 +1000 Subject: [PATCH 8/9] Document PYTHONCOERCECLOCALE=warn --- Doc/using/cmdline.rst | 5 +++++ Doc/whatsnew/3.7.rst | 19 +++++-------------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 920d5c01e4bef4..5adad159e65cc1 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -744,6 +744,11 @@ conflict. :data:`sys.stdin` and :data:`sys.stdout` to ``surrogateescape``. This behavior can be overridden using :envvar:`PYTHONIOENCODING` as usual. + For debugging purposes, setting ``PYTHONCOERCECLOCALE=warn`` will cause + Python to emit warning messages on ``stderr`` if either the locale coercion + activates, or else if a locale that *would* have triggered coercion is + still active when the Python runtime is initialized. + Availability: \*nix .. versionadded:: 3.7 diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst index db1195461728b0..5f683eb586a81c 100644 --- a/Doc/whatsnew/3.7.rst +++ b/Doc/whatsnew/3.7.rst @@ -96,20 +96,11 @@ defined coercion target locales (currently ``C.UTF-8``, ``C.utf8``, and ``UTF-8``). The default error handler for ``stderr`` continues to be ``backslashreplace``, regardless of locale. -.. note:: - - In the current implementation, a warning message is printed directly to - ``stderr`` even for successful implicit locale coercion. This gives - redistributors and system integrators the opportunity to determine if they - should be making an environmental change to avoid the need for implicit - coercion at the Python interpreter level. - - However, it's not clear that this is going to be the best approach for - the final 3.7.0 release, and we may end up deciding to disable the warning - by default and provide some way of opting into it at runtime or build time. - - Concrete examples of use cases where it would be preferrable to disable the - warning by default can be noted on :issue:`30565`. +Locale coercion is silent by default, but to assist in debugging potentially +locale related integration problems, explicit warnings (emitted directly on +``stderr`` can be requested by setting ``PYTHONCOERCECLOCALE=warn``. This +setting will also cause the Python runtime to emit a warning if the legacy C +locale remains active when the core interpreter is initialized. .. seealso:: From 919f8791ff6410a03a8bbf318985df40adb433a5 Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Sun, 18 Jun 2017 11:47:47 +1000 Subject: [PATCH 9/9] Update --help for PYTHONCOERCECLOCALE=warn --- Modules/main.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Modules/main.c b/Modules/main.c index 94400fedd42516..08b22760de1125 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -105,10 +105,10 @@ static const char usage_6[] = " predictable seed.\n" "PYTHONMALLOC: set the Python memory allocators and/or install debug hooks\n" " on Python memory allocators. Use PYTHONMALLOC=debug to install debug\n" -" hooks.\n"; -static const char usage_7[] = +" hooks.\n" "PYTHONCOERCECLOCALE: if this variable is set to 0, it disables the locale\n" -" coercion behavior\n"; +" coercion behavior. Use PYTHONCOERCECLOCALE=warn to request display of\n" +" locale coercion and locale compatibility warnings on stderr.\n"; static int usage(int exitcode, const wchar_t* program) @@ -125,7 +125,6 @@ usage(int exitcode, const wchar_t* program) fprintf(f, usage_4, (wint_t)DELIM); fprintf(f, usage_5, (wint_t)DELIM, PYTHONHOMEHELP); fputs(usage_6, f); - fputs(usage_7, f); } return exitcode; }