Skip to content

Commit 5ad5fe2

Browse files
fixed issue with unidentified char classes
1 parent 0143c27 commit 5ad5fe2

File tree

5 files changed

+74
-34
lines changed

5 files changed

+74
-34
lines changed

assembly/__spec_tests__/generated.spec.ts

+41-17
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
import { RegExp, Match } from "..";
55
import { expectMatch, expectNotMatch, exec } from "../__tests__/utils";
66

7-
xit("line: 1 - requires triage", () => {});
7+
it("line: 1 - matches the quick brown fox against 'the quick brown fox'", () => {
8+
const match = exec("the quick brown fox", "the quick brown fox", "");
9+
expect(match.matches[0]).toBe("the quick brown fox".substring(0, 19));
10+
});
811
it("line: 2 - matches the quick brown fox against 'The quick brown FOX'", () => {
912
expectNotMatch("the quick brown fox", ["The quick brown FOX"]);
1013
});
@@ -514,7 +517,10 @@ it("line: 111 - matches ^[^]cde] against 'dthing'", () => {
514517
it("line: 112 - matches ^[^]cde] against 'ething'", () => {
515518
expectNotMatch("^[^]cde]", ["ething"]);
516519
});
517-
xit("line: 113 - requires triage", () => {});
520+
it("line: 113 - matches ^\\� against '�'", () => {
521+
const match = exec("^\\�", "�", "");
522+
expect(match.matches[0]).toBe("�".substring(0, 1));
523+
});
518524
it("line: 114 - matches ^� against '�'", () => {
519525
const match = exec("^�", "�", "");
520526
expect(match.matches[0]).toBe("�".substring(0, 1));
@@ -1260,7 +1266,7 @@ it("line: 1154 - matches ^abc$ against 'abc'", () => {
12601266
xit("line: 1155 - test cases with CRs not supported yet!", () => {});
12611267
xit("line: 1156 - test cases with CRs not supported yet!", () => {});
12621268
xit("line: 1157 - test cases with CRs not supported yet!", () => {});
1263-
xit("line: 1158 - bug: g should not throw unsupported char class", () => {});
1269+
xit("line: 1158 - requires triage", () => {});
12641270
xit("line: 1159 - test cases with CRs not supported yet!", () => {});
12651271
xit("line: 1160 - test cases with CRs not supported yet!", () => {});
12661272
xit("line: 1161 - test cases with CRs not supported yet!", () => {});
@@ -1305,8 +1311,13 @@ it("line: 1178 - matches \\\x5c against '\\'", () => {
13051311
const match = exec("\\\x5c", "\\", "");
13061312
expect(match.matches[0]).toBe("\\".substring(0, 1));
13071313
});
1308-
xit("line: 1179 - bug: g should not throw unsupported char class", () => {});
1309-
xit("line: 1180 - bug: g should not throw unsupported char class", () => {});
1314+
it("line: 1179 - matches \\\x20Z against 'the Zoo'", () => {
1315+
const match = exec("\\\x20Z", "the Zoo", "");
1316+
expect(match.matches[0]).toBe("the Zoo".substring(3, 5));
1317+
});
1318+
it("line: 1180 - matches \\\x20Z against 'Zulu'", () => {
1319+
expectNotMatch("\\\x20Z", ["Zulu"]);
1320+
});
13101321
xit("line: 1181 - back references are not supported", () => {});
13111322
xit("line: 1182 - back references are not supported", () => {});
13121323
xit("line: 1183 - back references are not supported", () => {});
@@ -1355,7 +1366,10 @@ xit("line: 1218 - back references are not supported", () => {});
13551366
xit("line: 1219 - back references are not supported", () => {});
13561367
xit("line: 1220 - back references are not supported", () => {});
13571368
xit("line: 1221 - non capturing groups not supported", () => {});
1358-
xit("line: 1223 - bug: g should not throw unsupported char class", () => {});
1369+
it("line: 1223 - matches ab\\gdef against 'abgdef'", () => {
1370+
const match = exec("ab\\gdef", "abgdef", "");
1371+
expect(match.matches[0]).toBe("abgdef".substring(0, 6));
1372+
});
13591373
xit("line: 1224 - requires triage", () => {});
13601374
xit("line: 1225 - lazy quantifiers are not supported", () => {});
13611375
xit("line: 1226 - back references are not supported", () => {});
@@ -1364,13 +1378,15 @@ xit("line: 1228 - back references are not supported", () => {});
13641378
xit("line: 1229 - back references are not supported", () => {});
13651379
xit("line: 1230 - back references are not supported", () => {});
13661380
xit("line: 1231 - test cases with CRs not supported yet!", () => {});
1367-
xit("line: 1232 - bug: g should not throw unsupported char class", () => {});
1368-
xit("line: 1233 - bug: g should not throw unsupported char class", () => {});
1369-
xit("line: 1234 - bug: g should not throw unsupported char class", () => {});
1370-
xit("line: 1235 - bug: g should not throw unsupported char class", () => {});
1371-
xit("line: 1236 - bug: g should not throw unsupported char class", () => {});
1381+
xit("line: 1232 - requires triage", () => {});
1382+
xit("line: 1233 - requires triage", () => {});
1383+
xit("line: 1234 - requires triage", () => {});
1384+
xit("line: 1235 - requires triage", () => {});
1385+
it("line: 1236 - matches ^([^a])([^\\\b])([^c]*)([^d]{3,4}) against 'anything'", () => {
1386+
expectNotMatch("^([^a])([^\\\b])([^c]*)([^d]{3,4})", ["anything"]);
1387+
});
13721388
xit("line: 1237 - requires triage", () => {});
1373-
xit("line: 1238 - bug: g should not throw unsupported char class", () => {});
1389+
xit("line: 1238 - requires triage", () => {});
13741390
xit("line: 1239 - requires triage", () => {});
13751391
it("line: 1240 - matches [^a] against 'Abc'", () => {
13761392
const match = exec("[^a]", "Abc", "");
@@ -1418,9 +1434,17 @@ it("line: 1251 - matches [^k]{2,3}$ against 'akb'", () => {
14181434
it("line: 1252 - matches [^k]{2,3}$ against 'akk '", () => {
14191435
expectNotMatch("[^k]{2,3}$", ["akk "]);
14201436
});
1421-
xit("line: 1253 - bug: g should not throw unsupported char class", () => {});
1422-
xit("line: 1254 - bug: g should not throw unsupported char class", () => {});
1423-
xit("line: 1255 - bug: g should not throw unsupported char class", () => {});
1437+
it("line: 1253 - matches ^\\d{8,}\\@.+[^k]$ against '[email protected]'", () => {
1438+
const match = exec("^\\d{8,}\\@.+[^k]$", "[email protected]", "");
1439+
expect(match.matches[0]).toBe("[email protected]".substring(0, 16));
1440+
});
1441+
it("line: 1254 - matches ^\\d{8,}\\@.+[^k]$ against '[email protected]'", () => {
1442+
const match = exec("^\\d{8,}\\@.+[^k]$", "[email protected]", "");
1443+
expect(match.matches[0]).toBe("[email protected]".substring(0, 15));
1444+
});
1445+
it("line: 1255 - matches ^\\d{8,}\\@.+[^k]$ against '[email protected]'", () => {
1446+
expectNotMatch("^\\d{8,}\\@.+[^k]$", ["[email protected]"]);
1447+
});
14241448
it("line: 1256 - matches ^\\d{8,}\\@.+[^k]$ against '[email protected] '", () => {
14251449
expectNotMatch("^\\d{8,}\\@.+[^k]$", ["[email protected] "]);
14261450
});
@@ -1483,7 +1507,7 @@ xit("line: 1276 - non capturing groups not supported", () => {});
14831507
xit("line: 1277 - non capturing groups not supported", () => {});
14841508
xit("line: 1278 - non capturing groups not supported", () => {});
14851509
xit("line: 1279 - non capturing groups not supported", () => {});
1486-
xit("line: 1280 - bug: g should not throw unsupported char class", () => {});
1510+
xit("line: 1280 - requires triage", () => {});
14871511
it("line: 1281 - matches foo(.*)bar against 'The food is under the bar in the barn.'", () => {
14881512
const match = exec(
14891513
"foo(.*)bar",
@@ -1519,7 +1543,7 @@ it("line: 1287 - matches (.*)(\\d+)$ against 'I have 2 numbers: 53147'", () => {
15191543
expect(match.matches[2]).toBe("I have 2 numbers: 53147".substring(22, 23));
15201544
});
15211545
xit("line: 1288 - lazy quantifiers are not supported", () => {});
1522-
xit("line: 1289 - bug: g should not throw unsupported char class", () => {});
1546+
xit("line: 1289 - requires triage", () => {});
15231547
it("line: 1290 - matches (.*\\D)(\\d+)$ against 'I have 2 numbers: 53147'", () => {
15241548
const match = exec("(.*\\D)(\\d+)$", "I have 2 numbers: 53147", "");
15251549
expect(match.matches[0]).toBe("I have 2 numbers: 53147".substring(0, 23));

assembly/__tests__/character-classes.spec.ts

+4
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,7 @@ it("escaped dot", () => {
5353
expectMatch("\\.", ["."]);
5454
expectNotMatch("\\.", ["", "a"]);
5555
});
56+
57+
it("unrecognised character classes are treated as characters", () => {
58+
expectMatch("\\g\\m", ["gm"]);
59+
});

assembly/parser/parser.ts

+22-1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,25 @@ function isCharacterSetSpecialChar(code: Char): bool {
2929
);
3030
}
3131

32+
function isCharacterClass(code: u32): bool {
33+
switch (code) {
34+
case Char.d:
35+
case Char.D:
36+
case Char.Dot:
37+
case Char.w:
38+
case Char.W:
39+
case Char.s:
40+
case Char.S:
41+
case Char.t:
42+
case Char.r:
43+
case Char.n:
44+
case Char.v:
45+
case Char.f:
46+
return true;
47+
}
48+
return false;
49+
}
50+
3251
function isAssertion(code: u32): bool {
3352
return code == Char.Dollar || code == Char.Caret; // "$" or "^"
3453
}
@@ -119,8 +138,10 @@ export class Parser {
119138
return this.parseCharacterCode(Char.x);
120139
} else if (token == Char.u) {
121140
return this.parseCharacterCode(Char.u);
122-
} else {
141+
} else if (isCharacterClass(token)) {
123142
return new CharacterClassNode(this.eatToken());
143+
} else {
144+
return new CharacterNode(this.eatToken());
124145
}
125146
}
126147

spec/test-generator.js

+5-14
Original file line numberDiff line numberDiff line change
@@ -16,25 +16,15 @@ const knownIssues = {
1616
],
1717
"issues with repeated capture groups": [...range(63, 68), 1391, 1392],
1818
"bug that needs filing": [1102],
19-
"bug: \\g should not throw unsupported char class": [
20-
1223,
21-
1179,
22-
1180,
23-
1158,
24-
...range(1232, 1235),
25-
1236,
26-
1238,
27-
...range(1253, 1255),
28-
1280,
29-
1289,
30-
],
3119
"requires triage": [
32-
1,
33-
113,
3420
141,
3521
153,
3622
155,
3723
255,
24+
1158,
25+
...range(1232, 1235),
26+
1280,
27+
1289,
3828
256,
3929
261,
4030
262,
@@ -43,6 +33,7 @@ const knownIssues = {
4333
263,
4434
265,
4535
266,
36+
1238,
4637
...range(289, 291),
4738
1224,
4839
1277,

ts/index.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ globalAny.log = console.log;
55

66
import { RegExp } from "../assembly/regexp";
77

8-
const regexObj = new RegExp("^12.34");
9-
const match = regexObj.exec("12\n34");
8+
const regexObj = new RegExp("\\�");
9+
const match = regexObj.exec("");
1010

1111
console.log(match);

0 commit comments

Comments
 (0)