Skip to content

Commit 6c3fbbc

Browse files
bpo-13153: Use OS native encoding for converting between Python and Tcl. (GH-16545)
On Windows use UTF-16 (or UTF-32 for 32-bit Tcl_UniChar) with the "surrogatepass" error handler for converting to/from Tcl Unicode objects. On Linux use UTF-8 with the "surrogateescape" error handler for converting to/from Tcl String objects. Converting strings from Tcl to Python and back now never fails (except MemoryError). (cherry picked from commit 06cb94b) Co-authored-by: Serhiy Storchaka <[email protected]>
1 parent 4f82a53 commit 6c3fbbc

File tree

9 files changed

+240
-241
lines changed

9 files changed

+240
-241
lines changed

Lib/idlelib/editor.py

Lines changed: 3 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -358,21 +358,6 @@ def set_width(self):
358358
Font(text, font=text.cget('font')).measure('0')
359359
self.width = pixel_width // zero_char_width
360360

361-
def _filename_to_unicode(self, filename):
362-
"""Return filename as BMP unicode so displayable in Tk."""
363-
# Decode bytes to unicode.
364-
if isinstance(filename, bytes):
365-
try:
366-
filename = filename.decode(self.filesystemencoding)
367-
except UnicodeDecodeError:
368-
try:
369-
filename = filename.decode(self.encoding)
370-
except UnicodeDecodeError:
371-
# byte-to-byte conversion
372-
filename = filename.decode('iso8859-1')
373-
# Replace non-BMP char with diamond questionmark.
374-
return re.sub('[\U00010000-\U0010FFFF]', '\ufffd', filename)
375-
376361
def new_callback(self, event):
377362
dirname, basename = self.io.defaultfilename()
378363
self.flist.new(dirname)
@@ -963,10 +948,8 @@ def update_recent_files_list(self, new_file=None):
963948
menu.delete(0, END) # clear, and rebuild:
964949
for i, file_name in enumerate(rf_list):
965950
file_name = file_name.rstrip() # zap \n
966-
# make unicode string to display non-ASCII chars correctly
967-
ufile_name = self._filename_to_unicode(file_name)
968951
callback = instance.__recent_file_callback(file_name)
969-
menu.add_command(label=ulchars[i] + " " + ufile_name,
952+
menu.add_command(label=ulchars[i] + " " + file_name,
970953
command=callback,
971954
underline=0)
972955

@@ -1004,16 +987,10 @@ def reset_undo(self):
1004987

1005988
def short_title(self):
1006989
filename = self.io.filename
1007-
if filename:
1008-
filename = os.path.basename(filename)
1009-
else:
1010-
filename = "untitled"
1011-
# return unicode string to display non-ASCII chars correctly
1012-
return self._filename_to_unicode(filename)
990+
return os.path.basename(filename) if filename else "untitled"
1013991

1014992
def long_title(self):
1015-
# return unicode string to display non-ASCII chars correctly
1016-
return self._filename_to_unicode(self.io.filename or "")
993+
return self.io.filename or ""
1017994

1018995
def center_insert_event(self, event):
1019996
self.center()

Lib/idlelib/idle_test/test_editor.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,6 @@ def test_init(self):
3030
e._close()
3131

3232

33-
class EditorFunctionTest(unittest.TestCase):
34-
35-
def test_filename_to_unicode(self):
36-
func = Editor._filename_to_unicode
37-
class dummy():
38-
filesystemencoding = 'utf-8'
39-
pairs = (('abc', 'abc'), ('a\U00011111c', 'a\ufffdc'),
40-
(b'abc', 'abc'), (b'a\xf0\x91\x84\x91c', 'a\ufffdc'))
41-
for inp, out in pairs:
42-
self.assertEqual(func(dummy, inp), out)
43-
44-
4533
class TestGetLineIndent(unittest.TestCase):
4634
def test_empty_lines(self):
4735
for tabwidth in [1, 2, 4, 6, 8]:

Lib/idlelib/pyshell.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -679,14 +679,6 @@ def runsource(self, source):
679679
self.more = 0
680680
# at the moment, InteractiveInterpreter expects str
681681
assert isinstance(source, str)
682-
#if isinstance(source, str):
683-
# from idlelib import iomenu
684-
# try:
685-
# source = source.encode(iomenu.encoding)
686-
# except UnicodeError:
687-
# self.tkconsole.resetoutput()
688-
# self.write("Unsupported characters in input\n")
689-
# return
690682
# InteractiveInterpreter.runsource() calls its runcode() method,
691683
# which is overridden (see below)
692684
return InteractiveInterpreter.runsource(self, source, filename)
@@ -1298,16 +1290,6 @@ def resetoutput(self):
12981290
self.set_line_and_column()
12991291

13001292
def write(self, s, tags=()):
1301-
if isinstance(s, str) and len(s) and max(s) > '\uffff':
1302-
# Tk doesn't support outputting non-BMP characters
1303-
# Let's assume what printed string is not very long,
1304-
# find first non-BMP character and construct informative
1305-
# UnicodeEncodeError exception.
1306-
for start, char in enumerate(s):
1307-
if char > '\uffff':
1308-
break
1309-
raise UnicodeEncodeError("UCS-2", char, start, start+1,
1310-
'Non-BMP character not supported in Tk')
13111293
try:
13121294
self.text.mark_gravity("iomark", "right")
13131295
count = OutputWindow.write(self, s, tags, "iomark")

Lib/idlelib/runscript.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,7 @@ def _run_module_event(self, event, *, customize=False):
147147
interp = self.shell.interp
148148
if pyshell.use_subprocess and restart:
149149
interp.restart_subprocess(
150-
with_cwd=False, filename=
151-
self.editwin._filename_to_unicode(filename))
150+
with_cwd=False, filename=filename)
152151
dirname = os.path.dirname(filename)
153152
argv = [filename]
154153
if self.cli_args:

Lib/test/test_tcl.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,9 +429,12 @@ def passValue(value):
429429
self.assertEqual(passValue(False), False if self.wantobjects else '0')
430430
self.assertEqual(passValue('string'), 'string')
431431
self.assertEqual(passValue('string\u20ac'), 'string\u20ac')
432+
self.assertEqual(passValue('string\U0001f4bb'), 'string\U0001f4bb')
432433
self.assertEqual(passValue('str\x00ing'), 'str\x00ing')
433434
self.assertEqual(passValue('str\x00ing\xbd'), 'str\x00ing\xbd')
434435
self.assertEqual(passValue('str\x00ing\u20ac'), 'str\x00ing\u20ac')
436+
self.assertEqual(passValue('str\x00ing\U0001f4bb'),
437+
'str\x00ing\U0001f4bb')
435438
self.assertEqual(passValue(b'str\x00ing'),
436439
b'str\x00ing' if self.wantobjects else 'str\x00ing')
437440
self.assertEqual(passValue(b'str\xc0\x80ing'),
@@ -490,6 +493,7 @@ def float_eq(actual, expected):
490493
check('string')
491494
check('string\xbd')
492495
check('string\u20ac')
496+
check('string\U0001f4bb')
493497
check('')
494498
check(b'string', 'string')
495499
check(b'string\xe2\x82\xac', 'string\xe2\x82\xac')
@@ -531,6 +535,7 @@ def test_splitlist(self):
531535
('a\n b\t\r c\n ', ('a', 'b', 'c')),
532536
(b'a\n b\t\r c\n ', ('a', 'b', 'c')),
533537
('a \u20ac', ('a', '\u20ac')),
538+
('a \U0001f4bb', ('a', '\U0001f4bb')),
534539
(b'a \xe2\x82\xac', ('a', '\u20ac')),
535540
(b'a\xc0\x80b c\xc0\x80d', ('a\x00b', 'c\x00d')),
536541
('a {b c}', ('a', 'b c')),

Lib/tkinter/test/test_tkinter/test_misc.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,28 @@ def callback():
156156
with self.assertRaises(tkinter.TclError):
157157
root.tk.call('after', 'info', idle1)
158158

159+
def test_clipboard(self):
160+
root = self.root
161+
root.clipboard_clear()
162+
root.clipboard_append('Ùñî')
163+
self.assertEqual(root.clipboard_get(), 'Ùñî')
164+
root.clipboard_append('çōđě')
165+
self.assertEqual(root.clipboard_get(), 'Ùñîçōđě')
166+
root.clipboard_clear()
167+
with self.assertRaises(tkinter.TclError):
168+
root.clipboard_get()
169+
170+
def test_clipboard_astral(self):
171+
root = self.root
172+
root.clipboard_clear()
173+
root.clipboard_append('𝔘𝔫𝔦')
174+
self.assertEqual(root.clipboard_get(), '𝔘𝔫𝔦')
175+
root.clipboard_append('𝔠𝔬𝔡𝔢')
176+
self.assertEqual(root.clipboard_get(), '𝔘𝔫𝔦𝔠𝔬𝔡𝔢')
177+
root.clipboard_clear()
178+
with self.assertRaises(tkinter.TclError):
179+
root.clipboard_get()
180+
159181

160182
tests_gui = (MiscTest, )
161183

Lib/tkinter/test/test_ttk/test_widgets.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -489,8 +489,7 @@ def check_get_current(getval, currval):
489489
expected=('mon', 'tue', 'wed', 'thur'))
490490
self.checkParam(self.combo, 'values', ('mon', 'tue', 'wed', 'thur'))
491491
self.checkParam(self.combo, 'values', (42, 3.14, '', 'any string'))
492-
self.checkParam(self.combo, 'values', '',
493-
expected='' if get_tk_patchlevel() < (8, 5, 10) else ())
492+
self.checkParam(self.combo, 'values', '')
494493

495494
self.combo['values'] = ['a', 1, 'c']
496495

@@ -1245,12 +1244,7 @@ def test_values(self):
12451244
expected=('mon', 'tue', 'wed', 'thur'))
12461245
self.checkParam(self.spin, 'values', ('mon', 'tue', 'wed', 'thur'))
12471246
self.checkParam(self.spin, 'values', (42, 3.14, '', 'any string'))
1248-
self.checkParam(
1249-
self.spin,
1250-
'values',
1251-
'',
1252-
expected='' if get_tk_patchlevel() < (8, 5, 10) else ()
1253-
)
1247+
self.checkParam(self.spin, 'values', '')
12541248

12551249
self.spin['values'] = ['a', 1, 'c']
12561250

@@ -1308,8 +1302,7 @@ def test_columns(self):
13081302
self.checkParam(widget, 'columns', 'a b c',
13091303
expected=('a', 'b', 'c'))
13101304
self.checkParam(widget, 'columns', ('a', 'b', 'c'))
1311-
self.checkParam(widget, 'columns', (),
1312-
expected='' if get_tk_patchlevel() < (8, 5, 10) else ())
1305+
self.checkParam(widget, 'columns', '')
13131306

13141307
def test_displaycolumns(self):
13151308
widget = self.create()
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
OS native encoding is now used for converting between Python strings and
2+
Tcl objects. This allows to display, copy and paste to clipboard emoji and
3+
other non-BMP characters. Converting strings from Tcl to Python and back
4+
now never fails (except MemoryError).

0 commit comments

Comments
 (0)