1
1
from test import support
2
2
from test .support import os_helper
3
- from tokenize import (tokenize , _tokenize , untokenize , NUMBER , NAME , OP ,
3
+ from tokenize import (tokenize , untokenize , NUMBER , NAME , OP ,
4
4
STRING , ENDMARKER , ENCODING , tok_name , detect_encoding ,
5
5
open as tokenize_open , Untokenizer , generate_tokens ,
6
6
NEWLINE , _generate_tokens_from_c_tokenizer , DEDENT , TokenInfo )
@@ -51,6 +51,25 @@ def check_tokenize(self, s, expected):
51
51
[" ENCODING 'utf-8' (0, 0) (0, 0)" ] +
52
52
expected .rstrip ().splitlines ())
53
53
54
+ def test_invalid_readline (self ):
55
+ def gen ():
56
+ yield "sdfosdg"
57
+ yield "sdfosdg"
58
+ with self .assertRaises (TypeError ):
59
+ list (tokenize (gen ().__next__ ))
60
+
61
+ def gen ():
62
+ yield b"sdfosdg"
63
+ yield b"sdfosdg"
64
+ with self .assertRaises (TypeError ):
65
+ list (generate_tokens (gen ().__next__ ))
66
+
67
+ def gen ():
68
+ yield "sdfosdg"
69
+ 1 / 0
70
+ with self .assertRaises (ZeroDivisionError ):
71
+ list (generate_tokens (gen ().__next__ ))
72
+
54
73
def test_implicit_newline (self ):
55
74
# Make sure that the tokenizer puts in an implicit NEWLINE
56
75
# when the input lacks a trailing new line.
@@ -1154,7 +1173,8 @@ class TestTokenizerAdheresToPep0263(TestCase):
1154
1173
1155
1174
def _testFile (self , filename ):
1156
1175
path = os .path .join (os .path .dirname (__file__ ), filename )
1157
- TestRoundtrip .check_roundtrip (self , open (path , 'rb' ))
1176
+ with open (path , 'rb' ) as f :
1177
+ TestRoundtrip .check_roundtrip (self , f )
1158
1178
1159
1179
def test_utf8_coding_cookie_and_no_utf8_bom (self ):
1160
1180
f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'
@@ -1199,7 +1219,8 @@ def readline():
1199
1219
yield b''
1200
1220
1201
1221
# skip the initial encoding token and the end tokens
1202
- tokens = list (_tokenize (readline (), encoding = 'utf-8' ))[:- 2 ]
1222
+ tokens = list (_generate_tokens_from_c_tokenizer (readline ().__next__ , encoding = 'utf-8' ,
1223
+ extra_tokens = True ))[:- 2 ]
1203
1224
expected_tokens = [TokenInfo (3 , '"ЉЊЈЁЂ"' , (1 , 0 ), (1 , 7 ), '"ЉЊЈЁЂ"\n ' )]
1204
1225
self .assertEqual (tokens , expected_tokens ,
1205
1226
"bytes not decoded with encoding" )
@@ -1468,13 +1489,13 @@ def test_tokenize(self):
1468
1489
def mock_detect_encoding (readline ):
1469
1490
return encoding , [b'first' , b'second' ]
1470
1491
1471
- def mock__tokenize (readline , encoding ):
1492
+ def mock__tokenize (readline , encoding , ** kwargs ):
1472
1493
nonlocal encoding_used
1473
1494
encoding_used = encoding
1474
1495
out = []
1475
1496
while True :
1476
1497
try :
1477
- next_line = next ( readline )
1498
+ next_line = readline ( )
1478
1499
except StopIteration :
1479
1500
return out
1480
1501
if next_line :
@@ -1491,16 +1512,16 @@ def mock_readline():
1491
1512
return str (counter ).encode ()
1492
1513
1493
1514
orig_detect_encoding = tokenize_module .detect_encoding
1494
- orig__tokenize = tokenize_module ._tokenize
1515
+ orig_c_token = tokenize_module ._generate_tokens_from_c_tokenizer
1495
1516
tokenize_module .detect_encoding = mock_detect_encoding
1496
- tokenize_module ._tokenize = mock__tokenize
1517
+ tokenize_module ._generate_tokens_from_c_tokenizer = mock__tokenize
1497
1518
try :
1498
1519
results = tokenize (mock_readline )
1499
1520
self .assertEqual (list (results )[1 :],
1500
1521
[b'first' , b'second' , b'1' , b'2' , b'3' , b'4' ])
1501
1522
finally :
1502
1523
tokenize_module .detect_encoding = orig_detect_encoding
1503
- tokenize_module ._tokenize = orig__tokenize
1524
+ tokenize_module ._generate_tokens_from_c_tokenizer = orig_c_token
1504
1525
1505
1526
self .assertEqual (encoding_used , encoding )
1506
1527
@@ -1827,12 +1848,33 @@ class CTokenizeTest(TestCase):
1827
1848
def check_tokenize (self , s , expected ):
1828
1849
# Format the tokens in s in a table format.
1829
1850
# The ENDMARKER and final NEWLINE are omitted.
1851
+ f = StringIO (s )
1830
1852
with self .subTest (source = s ):
1831
1853
result = stringify_tokens_from_source (
1832
- _generate_tokens_from_c_tokenizer (s ), s
1854
+ _generate_tokens_from_c_tokenizer (f . readline ), s
1833
1855
)
1834
1856
self .assertEqual (result , expected .rstrip ().splitlines ())
1835
1857
1858
+ def test_encoding (self ):
1859
+ def readline (encoding ):
1860
+ yield "1+1" .encode (encoding )
1861
+
1862
+ expected = [
1863
+ TokenInfo (type = NUMBER , string = '1' , start = (1 , 0 ), end = (1 , 1 ), line = '1+1\n ' ),
1864
+ TokenInfo (type = OP , string = '+' , start = (1 , 1 ), end = (1 , 2 ), line = '1+1\n ' ),
1865
+ TokenInfo (type = NUMBER , string = '1' , start = (1 , 2 ), end = (1 , 3 ), line = '1+1\n ' ),
1866
+ TokenInfo (type = NEWLINE , string = '\n ' , start = (1 , 3 ), end = (1 , 4 ), line = '1+1\n ' ),
1867
+ TokenInfo (type = ENDMARKER , string = '' , start = (2 , 0 ), end = (2 , 0 ), line = '' )
1868
+ ]
1869
+ for encoding in ["utf-8" , "latin-1" , "utf-16" ]:
1870
+ with self .subTest (encoding = encoding ):
1871
+ tokens = list (_generate_tokens_from_c_tokenizer (
1872
+ readline (encoding ).__next__ ,
1873
+ extra_tokens = True ,
1874
+ encoding = encoding ,
1875
+ ))
1876
+ self .assertEqual (tokens , expected )
1877
+
1836
1878
def test_int (self ):
1837
1879
1838
1880
self .check_tokenize ('0xff <= 255' , """\
@@ -2668,43 +2710,44 @@ def test_unicode(self):
2668
2710
2669
2711
def test_invalid_syntax (self ):
2670
2712
def get_tokens (string ):
2671
- return list (_generate_tokens_from_c_tokenizer (string ))
2672
-
2673
- self .assertRaises (SyntaxError , get_tokens , "(1+2]" )
2674
- self .assertRaises (SyntaxError , get_tokens , "(1+2}" )
2675
- self .assertRaises (SyntaxError , get_tokens , "{1+2]" )
2676
-
2677
- self .assertRaises (SyntaxError , get_tokens , "1_" )
2678
- self .assertRaises (SyntaxError , get_tokens , "1.2_" )
2679
- self .assertRaises (SyntaxError , get_tokens , "1e2_" )
2680
- self .assertRaises (SyntaxError , get_tokens , "1e+" )
2681
-
2682
- self .assertRaises (SyntaxError , get_tokens , "\xa0 " )
2683
- self .assertRaises (SyntaxError , get_tokens , "€" )
2684
-
2685
- self .assertRaises (SyntaxError , get_tokens , "0b12" )
2686
- self .assertRaises (SyntaxError , get_tokens , "0b1_2" )
2687
- self .assertRaises (SyntaxError , get_tokens , "0b2" )
2688
- self .assertRaises (SyntaxError , get_tokens , "0b1_" )
2689
- self .assertRaises (SyntaxError , get_tokens , "0b" )
2690
- self .assertRaises (SyntaxError , get_tokens , "0o18" )
2691
- self .assertRaises (SyntaxError , get_tokens , "0o1_8" )
2692
- self .assertRaises (SyntaxError , get_tokens , "0o8" )
2693
- self .assertRaises (SyntaxError , get_tokens , "0o1_" )
2694
- self .assertRaises (SyntaxError , get_tokens , "0o" )
2695
- self .assertRaises (SyntaxError , get_tokens , "0x1_" )
2696
- self .assertRaises (SyntaxError , get_tokens , "0x" )
2697
- self .assertRaises (SyntaxError , get_tokens , "1_" )
2698
- self .assertRaises (SyntaxError , get_tokens , "012" )
2699
- self .assertRaises (SyntaxError , get_tokens , "1.2_" )
2700
- self .assertRaises (SyntaxError , get_tokens , "1e2_" )
2701
- self .assertRaises (SyntaxError , get_tokens , "1e+" )
2702
-
2703
- self .assertRaises (SyntaxError , get_tokens , "'sdfsdf" )
2704
- self .assertRaises (SyntaxError , get_tokens , "'''sdfsdf''" )
2705
-
2706
- self .assertRaises (SyntaxError , get_tokens , "(" * 1000 + "a" + ")" * 1000 )
2707
- self .assertRaises (SyntaxError , get_tokens , "]" )
2713
+ the_string = StringIO (string )
2714
+ return list (_generate_tokens_from_c_tokenizer (the_string .readline ))
2715
+
2716
+ for case in [
2717
+ "(1+2]" ,
2718
+ "(1+2}" ,
2719
+ "{1+2]" ,
2720
+ "1_" ,
2721
+ "1.2_" ,
2722
+ "1e2_" ,
2723
+ "1e+" ,
2724
+
2725
+ "\xa0 " ,
2726
+ "€" ,
2727
+ "0b12" ,
2728
+ "0b1_2" ,
2729
+ "0b2" ,
2730
+ "0b1_" ,
2731
+ "0b" ,
2732
+ "0o18" ,
2733
+ "0o1_8" ,
2734
+ "0o8" ,
2735
+ "0o1_" ,
2736
+ "0o" ,
2737
+ "0x1_" ,
2738
+ "0x" ,
2739
+ "1_" ,
2740
+ "012" ,
2741
+ "1.2_" ,
2742
+ "1e2_" ,
2743
+ "1e+" ,
2744
+ "'sdfsdf" ,
2745
+ "'''sdfsdf''" ,
2746
+ "(" * 1000 + "a" + ")" * 1000 ,
2747
+ "]" ,
2748
+ ]:
2749
+ with self .subTest (case = case ):
2750
+ self .assertRaises (SyntaxError , get_tokens , case )
2708
2751
2709
2752
def test_max_indent (self ):
2710
2753
MAXINDENT = 100
@@ -2715,20 +2758,24 @@ def generate_source(indents):
2715
2758
return source
2716
2759
2717
2760
valid = generate_source (MAXINDENT - 1 )
2718
- tokens = list (_generate_tokens_from_c_tokenizer (valid ))
2761
+ the_input = StringIO (valid )
2762
+ tokens = list (_generate_tokens_from_c_tokenizer (the_input .readline ))
2719
2763
self .assertEqual (tokens [- 2 ].type , DEDENT )
2720
2764
self .assertEqual (tokens [- 1 ].type , ENDMARKER )
2721
2765
compile (valid , "<string>" , "exec" )
2722
2766
2723
2767
invalid = generate_source (MAXINDENT )
2724
- self .assertRaises (SyntaxError , lambda : list (_generate_tokens_from_c_tokenizer (invalid )))
2768
+ the_input = StringIO (invalid )
2769
+ self .assertRaises (SyntaxError , lambda : list (_generate_tokens_from_c_tokenizer (the_input .readline )))
2725
2770
self .assertRaises (
2726
2771
IndentationError , compile , invalid , "<string>" , "exec"
2727
2772
)
2728
2773
2729
2774
def test_continuation_lines_indentation (self ):
2730
2775
def get_tokens (string ):
2731
- return [(kind , string ) for (kind , string , * _ ) in _generate_tokens_from_c_tokenizer (string )]
2776
+ the_string = StringIO (string )
2777
+ return [(kind , string ) for (kind , string , * _ )
2778
+ in _generate_tokens_from_c_tokenizer (the_string .readline )]
2732
2779
2733
2780
code = dedent ("""
2734
2781
def fib(n):
0 commit comments