Skip to content

Commit 391ca5d

Browse files
authored
Merge pull request #15390 from Marcono1234/marcono1234/python-ascii-regex-flag
2 parents 39b32a9 + 1ad08ef commit 391ca5d

File tree

9 files changed

+124
-11
lines changed

9 files changed

+124
-11
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: fix
3+
---
4+
* Fixed the `a` (ASCII) inline flag not being recognized by the regular expression library.

python/ql/lib/semmle/python/regexp/internal/ParseRegExp.qll

+13-10
Original file line numberDiff line numberDiff line change
@@ -116,13 +116,14 @@ class RegExp extends Expr instanceof StrConst {
116116

117117
/**
118118
* Gets a mode (if any) of this regular expression. Can be any of:
119-
* DEBUG
120-
* IGNORECASE
121-
* LOCALE
122-
* MULTILINE
123-
* DOTALL
124-
* UNICODE
125-
* VERBOSE
119+
* - DEBUG
120+
* - ASCII
121+
* - IGNORECASE
122+
* - LOCALE
123+
* - MULTILINE
124+
* - DOTALL
125+
* - UNICODE
126+
* - VERBOSE
126127
*/
127128
string getAMode() {
128129
result = FindRegexMode::getAMode(this)
@@ -705,19 +706,19 @@ class RegExp extends Expr instanceof StrConst {
705706
private predicate flag_group_start_no_modes(int start, int end) {
706707
this.isGroupStart(start) and
707708
this.getChar(start + 1) = "?" and
708-
this.getChar(start + 2) in ["i", "L", "m", "s", "u", "x"] and
709+
this.getChar(start + 2) in ["a", "i", "L", "m", "s", "u", "x"] and
709710
end = start + 2
710711
}
711712

712713
/**
713-
* Holds if `pos` contains a mo character from the
714+
* Holds if `pos` contains a mode character from the
714715
* flag group starting at `start`.
715716
*/
716717
private predicate mode_character(int start, int pos) {
717718
this.flag_group_start_no_modes(start, pos)
718719
or
719720
this.mode_character(start, pos - 1) and
720-
this.getChar(pos) in ["i", "L", "m", "s", "u", "x"]
721+
this.getChar(pos) in ["a", "i", "L", "m", "s", "u", "x"]
721722
}
722723

723724
/**
@@ -740,6 +741,8 @@ class RegExp extends Expr instanceof StrConst {
740741
*/
741742
string getModeFromPrefix() {
742743
exists(string c | this.flag(c) |
744+
c = "a" and result = "ASCII"
745+
or
743746
c = "i" and result = "IGNORECASE"
744747
or
745748
c = "L" and result = "LOCALE"

python/ql/test/library-tests/regex/Characters.expected

+19
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,13 @@
2020
| (?!not-this)^[A-Z_]+$ | 16 | 17 |
2121
| (?!not-this)^[A-Z_]+$ | 17 | 18 |
2222
| (?!not-this)^[A-Z_]+$ | 20 | 21 |
23+
| (?-imsx:a+) | 2 | 3 |
24+
| (?-imsx:a+) | 3 | 4 |
25+
| (?-imsx:a+) | 4 | 5 |
26+
| (?-imsx:a+) | 5 | 6 |
27+
| (?-imsx:a+) | 6 | 7 |
28+
| (?-imsx:a+) | 7 | 8 |
29+
| (?-imsx:a+) | 8 | 9 |
2330
| (?:(?:\n\r?)\|^)( *)\\S | 6 | 7 |
2431
| (?:(?:\n\r?)\|^)( *)\\S | 7 | 8 |
2532
| (?:(?:\n\r?)\|^)( *)\\S | 11 | 12 |
@@ -35,9 +42,21 @@
3542
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 19 | 21 |
3643
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 22 | 23 |
3744
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 24 | 25 |
45+
| (?Li)a+ | 5 | 6 |
3846
| (?P<name>[\\w]+)\| | 10 | 12 |
47+
| (?a-imsx:a+) | 3 | 4 |
48+
| (?a-imsx:a+) | 4 | 5 |
49+
| (?a-imsx:a+) | 5 | 6 |
50+
| (?a-imsx:a+) | 6 | 7 |
51+
| (?a-imsx:a+) | 7 | 8 |
52+
| (?a-imsx:a+) | 8 | 9 |
53+
| (?a-imsx:a+) | 9 | 10 |
54+
| (?aimsx)a+ | 8 | 9 |
55+
| (?aimsx:a+) | 7 | 8 |
56+
| (?aimsx:a+) | 8 | 9 |
3957
| (?m)^(?!$) | 4 | 5 |
4058
| (?m)^(?!$) | 8 | 9 |
59+
| (?ui)a+ | 5 | 6 |
4160
| (\\033\|~{) | 1 | 5 |
4261
| (\\033\|~{) | 6 | 7 |
4362
| (\\033\|~{) | 7 | 8 |

python/ql/test/library-tests/regex/FirstLast.expected

+18
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,32 @@
1818
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | first | 8 | 9 |
1919
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | first | 11 | 12 |
2020
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | last | 21 | 26 |
21+
| (?Li)a+ | first | 5 | 6 |
22+
| (?Li)a+ | first | 5 | 7 |
23+
| (?Li)a+ | last | 5 | 6 |
24+
| (?Li)a+ | last | 5 | 7 |
2125
| (?P<name>[\\w]+)\| | first | 9 | 13 |
2226
| (?P<name>[\\w]+)\| | first | 9 | 14 |
2327
| (?P<name>[\\w]+)\| | last | 9 | 13 |
2428
| (?P<name>[\\w]+)\| | last | 9 | 14 |
29+
| (?a-imsx:a+) | first | 3 | 9 |
30+
| (?a-imsx:a+) | last | 9 | 10 |
31+
| (?a-imsx:a+) | last | 9 | 11 |
32+
| (?aimsx)a+ | first | 8 | 9 |
33+
| (?aimsx)a+ | first | 8 | 10 |
34+
| (?aimsx)a+ | last | 8 | 9 |
35+
| (?aimsx)a+ | last | 8 | 10 |
36+
| (?aimsx:a+) | first | 7 | 8 |
37+
| (?aimsx:a+) | last | 8 | 9 |
38+
| (?aimsx:a+) | last | 8 | 10 |
2539
| (?m)^(?!$) | first | 4 | 5 |
2640
| (?m)^(?!$) | first | 8 | 9 |
2741
| (?m)^(?!$) | last | 4 | 5 |
2842
| (?m)^(?!$) | last | 8 | 9 |
43+
| (?ui)a+ | first | 5 | 6 |
44+
| (?ui)a+ | first | 5 | 7 |
45+
| (?ui)a+ | last | 5 | 6 |
46+
| (?ui)a+ | last | 5 | 7 |
2947
| (\\033\|~{) | first | 1 | 5 |
3048
| (\\033\|~{) | first | 6 | 8 |
3149
| (\\033\|~{) | last | 1 | 5 |

python/ql/test/library-tests/regex/GroupContents.expected

+2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 0 | 10 | (?:[^%]\|^) | 3 | 9 | [^%]\|^ |
99
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 14 | 19 | (\\w*) | 15 | 18 | \\w* |
1010
| (?P<name>[\\w]+)\| | 0 | 15 | (?P<name>[\\w]+) | 9 | 14 | [\\w]+ |
11+
| (?a-imsx:a+) | 0 | 12 | (?a-imsx:a+) | 3 | 11 | -imsx:a+ |
12+
| (?aimsx:a+) | 0 | 11 | (?aimsx:a+) | 7 | 10 | :a+ |
1113
| (?m)^(?!$) | 5 | 10 | (?!$) | 8 | 9 | $ |
1214
| (\\033\|~{) | 0 | 9 | (\\033\|~{) | 1 | 8 | \\033\|~{ |
1315
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 2 | 16 | (?P<txt>[^[]*) | 10 | 15 | [^[]* |

python/ql/test/library-tests/regex/Mode.expected

+16-1
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,19 @@
1010
| 54 | DOTALL |
1111
| 54 | VERBOSE |
1212
| 56 | VERBOSE |
13-
| 68 | MULTILINE |
13+
| 59 | ASCII |
14+
| 59 | DOTALL |
15+
| 59 | IGNORECASE |
16+
| 59 | MULTILINE |
17+
| 59 | VERBOSE |
18+
| 60 | IGNORECASE |
19+
| 60 | UNICODE |
20+
| 61 | IGNORECASE |
21+
| 61 | LOCALE |
22+
| 63 | ASCII |
23+
| 63 | DOTALL |
24+
| 63 | IGNORECASE |
25+
| 63 | MULTILINE |
26+
| 63 | VERBOSE |
27+
| 65 | ASCII |
28+
| 77 | MULTILINE |

python/ql/test/library-tests/regex/Qualified.expected

+6
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,15 @@
11
| (?!not-this)^[A-Z_]+$ | 13 | 20 | false | true |
2+
| (?-imsx:a+) | 8 | 10 | false | true |
23
| (?:(?:\n\r?)\|^)( *)\\S | 7 | 9 | true | false |
34
| (?:(?:\n\r?)\|^)( *)\\S | 14 | 16 | true | true |
45
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 0 | 11 | true | false |
56
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 15 | 18 | true | true |
7+
| (?Li)a+ | 5 | 7 | false | true |
68
| (?P<name>[\\w]+)\| | 9 | 14 | false | true |
9+
| (?a-imsx:a+) | 9 | 11 | false | true |
10+
| (?aimsx)a+ | 8 | 10 | false | true |
11+
| (?aimsx:a+) | 8 | 10 | false | true |
12+
| (?ui)a+ | 5 | 7 | false | true |
713
| \\A[+-]?\\d+ | 2 | 7 | true | false |
814
| \\A[+-]?\\d+ | 7 | 10 | false | true |
915
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 10 | 15 | true | true |

python/ql/test/library-tests/regex/Regex.expected

+37
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,14 @@
2626
| (?!not-this)^[A-Z_]+$ | qualified | 13 | 20 |
2727
| (?!not-this)^[A-Z_]+$ | sequence | 0 | 21 |
2828
| (?!not-this)^[A-Z_]+$ | sequence | 3 | 11 |
29+
| (?-imsx:a+) | char | 2 | 3 |
30+
| (?-imsx:a+) | char | 3 | 4 |
31+
| (?-imsx:a+) | char | 4 | 5 |
32+
| (?-imsx:a+) | char | 5 | 6 |
33+
| (?-imsx:a+) | char | 6 | 7 |
34+
| (?-imsx:a+) | char | 7 | 8 |
35+
| (?-imsx:a+) | char | 8 | 9 |
36+
| (?-imsx:a+) | qualified | 8 | 10 |
2937
| (?:(?:\n\r?)\|^)( *)\\S | ^ | 11 | 12 |
3038
| (?:(?:\n\r?)\|^)( *)\\S | char | 6 | 7 |
3139
| (?:(?:\n\r?)\|^)( *)\\S | char | 7 | 8 |
@@ -69,18 +77,47 @@
6977
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | sequence | 0 | 26 |
7078
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | sequence | 3 | 7 |
7179
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | sequence | 8 | 9 |
80+
| (?Li)a+ | char | 5 | 6 |
81+
| (?Li)a+ | empty group | 0 | 5 |
82+
| (?Li)a+ | qualified | 5 | 7 |
83+
| (?Li)a+ | sequence | 0 | 7 |
7284
| (?P<name>[\\w]+)\| | char | 10 | 12 |
7385
| (?P<name>[\\w]+)\| | char-set | 9 | 13 |
7486
| (?P<name>[\\w]+)\| | choice | 0 | 16 |
7587
| (?P<name>[\\w]+)\| | non-empty group | 0 | 15 |
7688
| (?P<name>[\\w]+)\| | qualified | 9 | 14 |
7789
| (?P<name>[\\w]+)\| | sequence | 0 | 15 |
90+
| (?a-imsx:a+) | char | 3 | 4 |
91+
| (?a-imsx:a+) | char | 4 | 5 |
92+
| (?a-imsx:a+) | char | 5 | 6 |
93+
| (?a-imsx:a+) | char | 6 | 7 |
94+
| (?a-imsx:a+) | char | 7 | 8 |
95+
| (?a-imsx:a+) | char | 8 | 9 |
96+
| (?a-imsx:a+) | char | 9 | 10 |
97+
| (?a-imsx:a+) | non-empty group | 0 | 12 |
98+
| (?a-imsx:a+) | qualified | 9 | 11 |
99+
| (?a-imsx:a+) | sequence | 0 | 12 |
100+
| (?a-imsx:a+) | sequence | 3 | 11 |
101+
| (?aimsx)a+ | char | 8 | 9 |
102+
| (?aimsx)a+ | empty group | 0 | 8 |
103+
| (?aimsx)a+ | qualified | 8 | 10 |
104+
| (?aimsx)a+ | sequence | 0 | 10 |
105+
| (?aimsx:a+) | char | 7 | 8 |
106+
| (?aimsx:a+) | char | 8 | 9 |
107+
| (?aimsx:a+) | non-empty group | 0 | 11 |
108+
| (?aimsx:a+) | qualified | 8 | 10 |
109+
| (?aimsx:a+) | sequence | 0 | 11 |
110+
| (?aimsx:a+) | sequence | 7 | 10 |
78111
| (?m)^(?!$) | $ | 8 | 9 |
79112
| (?m)^(?!$) | ^ | 4 | 5 |
80113
| (?m)^(?!$) | empty group | 0 | 4 |
81114
| (?m)^(?!$) | empty group | 5 | 10 |
82115
| (?m)^(?!$) | sequence | 0 | 10 |
83116
| (?m)^(?!$) | sequence | 8 | 9 |
117+
| (?ui)a+ | char | 5 | 6 |
118+
| (?ui)a+ | empty group | 0 | 5 |
119+
| (?ui)a+ | qualified | 5 | 7 |
120+
| (?ui)a+ | sequence | 0 | 7 |
84121
| (\\033\|~{) | char | 1 | 5 |
85122
| (\\033\|~{) | char | 6 | 7 |
86123
| (\\033\|~{) | char | 7 | 8 |

python/ql/test/library-tests/regex/test.py

+9
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,15 @@
5555
# re.X is an alias for re.VERBOSE
5656
re.compile("", re.X)
5757

58+
#Inline flags; 'a', 'L' and 'u' are mutually exclusive
59+
re.compile("(?aimsx)a+")
60+
re.compile("(?ui)a+")
61+
re.compile(b"(?Li)a+")
62+
#Group with inline flags; TODO: these are not properly parsed and handled yet
63+
re.compile("(?aimsx:a+)")
64+
re.compile("(?-imsx:a+)")
65+
re.compile("(?a-imsx:a+)")
66+
5867
#empty choice
5968
re.compile(r'|x')
6069
re.compile(r'x|')

0 commit comments

Comments
 (0)