28
28
POSSESSIVE_REPEAT : (POSSESSIVE_REPEAT , SUCCESS , POSSESSIVE_REPEAT_ONE ),
29
29
}
30
30
31
- class _CompileData :
32
- __slots__ = ('code' , 'repeat_count' )
33
- def __init__ (self ):
34
- self .code = []
35
- self .repeat_count = 0
31
+ # Sets of lowercase characters which have the same uppercase.
32
+ _equivalences = (
33
+ # LATIN SMALL LETTER I, LATIN SMALL LETTER DOTLESS I
34
+ (0x69 , 0x131 ), # iı
35
+ # LATIN SMALL LETTER S, LATIN SMALL LETTER LONG S
36
+ (0x73 , 0x17f ), # sſ
37
+ # MICRO SIGN, GREEK SMALL LETTER MU
38
+ (0xb5 , 0x3bc ), # µμ
39
+ # COMBINING GREEK YPOGEGRAMMENI, GREEK SMALL LETTER IOTA, GREEK PROSGEGRAMMENI
40
+ (0x345 , 0x3b9 , 0x1fbe ), # \u0345ιι
41
+ # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS, GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
42
+ (0x390 , 0x1fd3 ), # ΐΐ
43
+ # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS, GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
44
+ (0x3b0 , 0x1fe3 ), # ΰΰ
45
+ # GREEK SMALL LETTER BETA, GREEK BETA SYMBOL
46
+ (0x3b2 , 0x3d0 ), # βϐ
47
+ # GREEK SMALL LETTER EPSILON, GREEK LUNATE EPSILON SYMBOL
48
+ (0x3b5 , 0x3f5 ), # εϵ
49
+ # GREEK SMALL LETTER THETA, GREEK THETA SYMBOL
50
+ (0x3b8 , 0x3d1 ), # θϑ
51
+ # GREEK SMALL LETTER KAPPA, GREEK KAPPA SYMBOL
52
+ (0x3ba , 0x3f0 ), # κϰ
53
+ # GREEK SMALL LETTER PI, GREEK PI SYMBOL
54
+ (0x3c0 , 0x3d6 ), # πϖ
55
+ # GREEK SMALL LETTER RHO, GREEK RHO SYMBOL
56
+ (0x3c1 , 0x3f1 ), # ρϱ
57
+ # GREEK SMALL LETTER FINAL SIGMA, GREEK SMALL LETTER SIGMA
58
+ (0x3c2 , 0x3c3 ), # ςσ
59
+ # GREEK SMALL LETTER PHI, GREEK PHI SYMBOL
60
+ (0x3c6 , 0x3d5 ), # φϕ
61
+ # LATIN SMALL LETTER S WITH DOT ABOVE, LATIN SMALL LETTER LONG S WITH DOT ABOVE
62
+ (0x1e61 , 0x1e9b ), # ṡẛ
63
+ # LATIN SMALL LIGATURE LONG S T, LATIN SMALL LIGATURE ST
64
+ (0xfb05 , 0xfb06 ), # ſtst
65
+ )
66
+
67
+ # Maps the lowercase code to lowercase codes which have the same uppercase.
68
+ _ignorecase_fixes = {i : tuple (j for j in t if i != j )
69
+ for t in _equivalences for i in t }
36
70
37
71
def _combine_flags (flags , add_flags , del_flags ,
38
72
TYPE_FLAGS = _parser .TYPE_FLAGS ):
39
73
if add_flags & TYPE_FLAGS :
40
74
flags &= ~ TYPE_FLAGS
41
75
return (flags | add_flags ) & ~ del_flags
42
76
43
- def _compile (data , pattern , flags ):
77
+ def _compile (code , pattern , flags ):
44
78
# internal: compile a (sub)pattern
45
- code = data .code
46
79
emit = code .append
47
80
_len = len
48
81
LITERAL_CODES = _LITERAL_CODES
@@ -115,19 +148,15 @@ def _compile(data, pattern, flags):
115
148
skip = _len (code ); emit (0 )
116
149
emit (av [0 ])
117
150
emit (av [1 ])
118
- _compile (data , av [2 ], flags )
151
+ _compile (code , av [2 ], flags )
119
152
emit (SUCCESS )
120
153
code [skip ] = _len (code ) - skip
121
154
else :
122
155
emit (REPEATING_CODES [op ][0 ])
123
156
skip = _len (code ); emit (0 )
124
157
emit (av [0 ])
125
158
emit (av [1 ])
126
- # now op is in (MIN_REPEAT, MAX_REPEAT, POSSESSIVE_REPEAT)
127
- if op != POSSESSIVE_REPEAT :
128
- emit (data .repeat_count )
129
- data .repeat_count += 1
130
- _compile (data , av [2 ], flags )
159
+ _compile (code , av [2 ], flags )
131
160
code [skip ] = _len (code ) - skip
132
161
emit (REPEATING_CODES [op ][1 ])
133
162
elif op is SUBPATTERN :
@@ -136,7 +165,7 @@ def _compile(data, pattern, flags):
136
165
emit (MARK )
137
166
emit ((group - 1 )* 2 )
138
167
# _compile_info(code, p, _combine_flags(flags, add_flags, del_flags))
139
- _compile (data , p , _combine_flags (flags , add_flags , del_flags ))
168
+ _compile (code , p , _combine_flags (flags , add_flags , del_flags ))
140
169
if group :
141
170
emit (MARK )
142
171
emit ((group - 1 )* 2 + 1 )
@@ -148,7 +177,7 @@ def _compile(data, pattern, flags):
148
177
# pop their stack if they reach it
149
178
emit (ATOMIC_GROUP )
150
179
skip = _len (code ); emit (0 )
151
- _compile (data , av , flags )
180
+ _compile (code , av , flags )
152
181
emit (SUCCESS )
153
182
code [skip ] = _len (code ) - skip
154
183
elif op in SUCCESS_CODES :
@@ -163,7 +192,7 @@ def _compile(data, pattern, flags):
163
192
if lo != hi :
164
193
raise error ("look-behind requires fixed-width pattern" )
165
194
emit (lo ) # look behind
166
- _compile (data , av [1 ], flags )
195
+ _compile (code , av [1 ], flags )
167
196
emit (SUCCESS )
168
197
code [skip ] = _len (code ) - skip
169
198
elif op is AT :
@@ -182,7 +211,7 @@ def _compile(data, pattern, flags):
182
211
for av in av [1 ]:
183
212
skip = _len (code ); emit (0 )
184
213
# _compile_info(code, av, flags)
185
- _compile (data , av , flags )
214
+ _compile (code , av , flags )
186
215
emit (JUMP )
187
216
tailappend (_len (code )); emit (0 )
188
217
code [skip ] = _len (code ) - skip
@@ -210,12 +239,12 @@ def _compile(data, pattern, flags):
210
239
emit (op )
211
240
emit (av [0 ]- 1 )
212
241
skipyes = _len (code ); emit (0 )
213
- _compile (data , av [1 ], flags )
242
+ _compile (code , av [1 ], flags )
214
243
if av [2 ]:
215
244
emit (JUMP )
216
245
skipno = _len (code ); emit (0 )
217
246
code [skipyes ] = _len (code ) - skipyes + 1
218
- _compile (data , av [2 ], flags )
247
+ _compile (code , av [2 ], flags )
219
248
code [skipno ] = _len (code ) - skipno
220
249
else :
221
250
code [skipyes ] = _len (code ) - skipyes + 1
@@ -582,17 +611,17 @@ def isstring(obj):
582
611
def _code (p , flags ):
583
612
584
613
flags = p .state .flags | flags
585
- data = _CompileData ()
614
+ code = []
586
615
587
616
# compile info block
588
- _compile_info (data . code , p , flags )
617
+ _compile_info (code , p , flags )
589
618
590
619
# compile the pattern
591
- _compile (data , p .data , flags )
620
+ _compile (code , p .data , flags )
592
621
593
- data . code .append (SUCCESS )
622
+ code .append (SUCCESS )
594
623
595
- return data
624
+ return code
596
625
597
626
def _hex_code (code ):
598
627
return '[%s]' % ', ' .join ('%#0*x' % (_sre .CODESIZE * 2 + 2 , x ) for x in code )
@@ -693,21 +722,14 @@ def print_2(*args):
693
722
else :
694
723
print_ (FAILURE )
695
724
i += 1
696
- elif op in (REPEAT_ONE , MIN_REPEAT_ONE ,
725
+ elif op in (REPEAT , REPEAT_ONE , MIN_REPEAT_ONE ,
697
726
POSSESSIVE_REPEAT , POSSESSIVE_REPEAT_ONE ):
698
727
skip , min , max = code [i : i + 3 ]
699
728
if max == MAXREPEAT :
700
729
max = 'MAXREPEAT'
701
730
print_ (op , skip , min , max , to = i + skip )
702
731
dis_ (i + 3 , i + skip )
703
732
i += skip
704
- elif op is REPEAT :
705
- skip , min , max , repeat_index = code [i : i + 4 ]
706
- if max == MAXREPEAT :
707
- max = 'MAXREPEAT'
708
- print_ (op , skip , min , max , repeat_index , to = i + skip )
709
- dis_ (i + 4 , i + skip )
710
- i += skip
711
733
elif op is GROUPREF_EXISTS :
712
734
arg , skip = code [i : i + 2 ]
713
735
print_ (op , arg , skip , to = i + skip )
@@ -762,11 +784,11 @@ def compile(p, flags=0):
762
784
else :
763
785
pattern = None
764
786
765
- data = _code (p , flags )
787
+ code = _code (p , flags )
766
788
767
789
if flags & SRE_FLAG_DEBUG :
768
790
print ()
769
- dis (data . code )
791
+ dis (code )
770
792
771
793
# map in either direction
772
794
groupindex = p .state .groupdict
@@ -775,6 +797,7 @@ def compile(p, flags=0):
775
797
indexgroup [i ] = k
776
798
777
799
return _sre .compile (
778
- pattern , flags | p .state .flags , data .code ,
779
- p .state .groups - 1 , groupindex , tuple (indexgroup ),
780
- data .repeat_count )
800
+ pattern , flags | p .state .flags , code ,
801
+ p .state .groups - 1 ,
802
+ groupindex , tuple (indexgroup )
803
+ )
0 commit comments