1
1
import sys
2
2
import ffilib
3
3
import array
4
+ import uctypes
4
5
6
+ pcre2 = ffilib .open ("libpcre2-8" )
5
7
6
- pcre = ffilib .open ("libpcre" )
8
+ # pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length,
9
+ # uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset,
10
+ # pcre2_compile_context *ccontext);
11
+ pcre2_compile = pcre2 .func ("p" , "pcre2_compile_8" , "siippp" )
7
12
8
- # pcre *pcre_compile (const char *pattern, int options ,
9
- # const char **errptr, int *erroffset ,
10
- # const unsigned char *tableptr );
11
- pcre_compile = pcre .func ("p " , "pcre_compile " , "sipps " )
13
+ # int pcre2_match (const pcre2_code *code, PCRE2_SPTR subject ,
14
+ # PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options ,
15
+ # pcre2_match_data *match_data, pcre2_match_context *mcontext );
16
+ pcre2_match = pcre2 .func ("i " , "pcre2_match_8 " , "Psiiipp " )
12
17
13
- # int pcre_exec(const pcre *code, const pcre_extra *extra,
14
- # const char *subject, int length, int startoffset,
15
- # int options, int *ovector, int ovecsize);
16
- pcre_exec = pcre .func ("i" , "pcre_exec" , "PPsiiipi" )
18
+ # int pcre2_pattern_info(const pcre2_code *code, uint32_t what,
19
+ # void *where);
20
+ pcre2_pattern_info = pcre2 .func ("i" , "pcre2_pattern_info_8" , "Pip" )
17
21
18
- # int pcre_fullinfo(const pcre *code, const pcre_extra *extra,
19
- # int what, void *where);
20
- pcre_fullinfo = pcre .func ("i" , "pcre_fullinfo" , "PPip" )
22
+ # PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data);
23
+ pcre2_get_ovector_pointer = pcre2 .func ("p" , "pcre2_get_ovector_pointer_8" , "p" )
21
24
25
+ # pcre2_match_data *pcre2_match_data_create_from_pattern(const pcre2_code *code,
26
+ # pcre2_general_context *gcontext);
27
+ pcre2_match_data_create_from_pattern = pcre2 .func (
28
+ "p" , "pcre2_match_data_create_from_pattern_8" , "Pp"
29
+ )
22
30
23
- IGNORECASE = I = 1
24
- MULTILINE = M = 2
25
- DOTALL = S = 4
26
- VERBOSE = X = 8
27
- PCRE_ANCHORED = 0x10
31
+ # PCRE2_SIZE that is of type size_t.
32
+ # Use ULONG as type to support both 32bit and 64bit.
33
+ PCRE2_SIZE_SIZE = uctypes .sizeof ({"field" : 0 | uctypes .ULONG })
34
+ PCRE2_SIZE_TYPE = "L"
35
+
36
+ # Real value in pcre2.h is 0xFFFFFFFF for 32bit and
37
+ # 0x0xFFFFFFFFFFFFFFFF for 64bit that is equivalent
38
+ # to -1
39
+ PCRE2_ZERO_TERMINATED = - 1
40
+
41
+
42
+ IGNORECASE = I = 0x8
43
+ MULTILINE = M = 0x400
44
+ DOTALL = S = 0x20
45
+ VERBOSE = X = 0x80
46
+ PCRE2_ANCHORED = 0x80000000
28
47
29
48
# TODO. Note that Python3 has unicode by default
30
49
ASCII = A = 0
31
50
UNICODE = U = 0
32
51
33
- PCRE_INFO_CAPTURECOUNT = 2
52
+ PCRE2_INFO_CAPTURECOUNT = 0x4
34
53
35
54
36
55
class PCREMatch :
@@ -67,19 +86,23 @@ def __init__(self, compiled_ptn):
67
86
def search (self , s , pos = 0 , endpos = - 1 , _flags = 0 ):
68
87
assert endpos == - 1 , "pos: %d, endpos: %d" % (pos , endpos )
69
88
buf = array .array ("i" , [0 ])
70
- pcre_fullinfo (self .obj , None , PCRE_INFO_CAPTURECOUNT , buf )
89
+ pcre2_pattern_info (self .obj , PCRE2_INFO_CAPTURECOUNT , buf )
71
90
cap_count = buf [0 ]
72
- ov = array . array ( "i" , [ 0 , 0 , 0 ] * ( cap_count + 1 ) )
73
- num = pcre_exec (self .obj , None , s , len (s ), pos , _flags , ov , len ( ov ) )
91
+ match_data = pcre2_match_data_create_from_pattern ( self . obj , None )
92
+ num = pcre2_match (self .obj , s , len (s ), pos , _flags , match_data , None )
74
93
if num == - 1 :
75
94
# No match
76
95
return None
96
+ ov_ptr = pcre2_get_ovector_pointer (match_data )
97
+ # pcre2_get_ovector_pointer return PCRE2_SIZE
98
+ ov_buf = uctypes .bytearray_at (ov_ptr , PCRE2_SIZE_SIZE * (cap_count + 1 ) * 2 )
99
+ ov = array .array (PCRE2_SIZE_TYPE , ov_buf )
77
100
# We don't care how many matching subexpressions we got, we
78
101
# care only about total # of capturing ones (including empty)
79
102
return PCREMatch (s , cap_count + 1 , ov )
80
103
81
104
def match (self , s , pos = 0 , endpos = - 1 ):
82
- return self .search (s , pos , endpos , PCRE_ANCHORED )
105
+ return self .search (s , pos , endpos , PCRE2_ANCHORED )
83
106
84
107
def sub (self , repl , s , count = 0 ):
85
108
if not callable (repl ):
@@ -141,9 +164,9 @@ def findall(self, s):
141
164
142
165
143
166
def compile (pattern , flags = 0 ):
144
- errptr = bytes (4 )
167
+ errcode = bytes (4 )
145
168
erroffset = bytes (4 )
146
- regex = pcre_compile (pattern , flags , errptr , erroffset , None )
169
+ regex = pcre2_compile (pattern , PCRE2_ZERO_TERMINATED , flags , errcode , erroffset , None )
147
170
assert regex
148
171
return PCREPattern (regex )
149
172
@@ -154,7 +177,7 @@ def search(pattern, string, flags=0):
154
177
155
178
156
179
def match (pattern , string , flags = 0 ):
157
- r = compile (pattern , flags | PCRE_ANCHORED )
180
+ r = compile (pattern , flags | PCRE2_ANCHORED )
158
181
return r .search (string )
159
182
160
183
0 commit comments