Skip to content

Commit b45e211

Browse files
authored
fix(parser) complete fix for resuming matches from same index (#2678)
1 parent 1d1ddcd commit b45e211

File tree

7 files changed

+80
-29
lines changed

7 files changed

+80
-29
lines changed

package-lock.json

+3-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@
3232
"test": "mocha test",
3333
"test-markup": "mocha test/markup",
3434
"test-detect": "mocha test/detect",
35-
"test-browser": "mocha test/browser"
35+
"test-browser": "mocha test/browser",
36+
"test-parser": "mocha test/parser"
3637
},
3738
"engines": {
3839
"node": "*"

src/highlight.js

+1-8
Original file line numberDiff line numberDiff line change
@@ -498,20 +498,13 @@ const HLJS = function(hljs) {
498498
// considered for a potential match
499499
resumeScanAtSamePosition = false;
500500
} else {
501-
top.matcher.lastIndex = index;
502501
top.matcher.considerAll();
503502
}
503+
top.matcher.lastIndex = index;
504504

505505
const match = top.matcher.exec(codeToHighlight);
506506
// console.log("match", match[0], match.rule && match.rule.begin)
507507

508-
// if our failure to match was the result of a "resumed scan" then we
509-
// need to advance one position and revert to full scanning before we
510-
// decide there are truly no more matches at all to be had
511-
if (!match && top.matcher.resumingScanAtSamePosition()) {
512-
advanceOne();
513-
continue;
514-
}
515508
if (!match) break;
516509

517510
const beforeMatch = codeToHighlight.substring(index, match.index);

src/lib/mode_compiler.js

+48-5
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ export function compileLanguage(language) {
143143
}
144144

145145
resumingScanAtSamePosition() {
146-
return this.regexIndex != 0;
146+
return this.regexIndex !== 0;
147147
}
148148

149149
considerAll() {
@@ -160,15 +160,58 @@ export function compileLanguage(language) {
160160
exec(s) {
161161
const m = this.getMatcher(this.regexIndex);
162162
m.lastIndex = this.lastIndex;
163-
const result = m.exec(s);
163+
let result = m.exec(s);
164+
165+
// The following is because we have no easy way to say "resume scanning at the
166+
// existing position but also skip the current rule ONLY". What happens is
167+
// all prior rules are also skipped which can result in matching the wrong
168+
// thing. Example of matching "booger":
169+
170+
// our matcher is [string, "booger", number]
171+
//
172+
// ....booger....
173+
174+
// if "booger" is ignored then we'd really need a regex to scan from the
175+
// SAME position for only: [string, number] but ignoring "booger" (if it
176+
// was the first match), a simple resume would scan ahead who knows how
177+
// far looking only for "number", ignoring potential string matches (or
178+
// future "booger" matches that might be valid.)
179+
180+
// So what we do: We execute two matchers, one resuming at the same
181+
// position, but the second full matcher starting at the position after:
182+
183+
// /--- resume first regex match here (for [number])
184+
// |/---- full match here for [string, "booger", number]
185+
// vv
186+
// ....booger....
187+
188+
// Which ever results in a match first is then used. So this 3-4 step
189+
// process essentially allows us to say "match at this position, excluding
190+
// a prior rule that was ignored".
191+
//
192+
// 1. Match "booger" first, ignore. Also proves that [string] does non match.
193+
// 2. Resume matching for [number]
194+
// 3. Match at index + 1 for [string, "booger", number]
195+
// 4. If #2 and #3 result in matches, which came first?
196+
if (this.resumingScanAtSamePosition()) {
197+
if (result && result.index === this.lastIndex) {
198+
// result is position +0 and therefore a valid
199+
// "resume" match so result stays result
200+
} else { // use the second matcher result
201+
const m2 = this.getMatcher(0);
202+
m2.lastIndex = this.lastIndex + 1;
203+
result = m2.exec(s);
204+
}
205+
}
206+
164207
if (result) {
165208
this.regexIndex += result.position + 1;
166-
if (this.regexIndex === this.count) { // wrap-around
167-
this.regexIndex = 0;
209+
if (this.regexIndex === this.count) {
210+
// wrap-around to considering all matches again
211+
this.considerAll();
168212
}
169213
}
170214

171-
// this.regexIndex = 0;
172215
return result;
173216
}
174217
}

test/index.js

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
'use strict';
22

3-
const hljs = require('../build');
3+
const hljs = require('../build');
44
hljs.debugMode(); // tests run in debug mode so errors are raised
55

66
// Tests specific to the API exposed inside the hljs object.
77
// Right now, that only includes tests for several common regular expressions.
88
require('./api');
99

1010
// Test weird bugs we've fixed over time
11-
require("./parser")
11+
require("./parser");
1212

1313
// Tests for auto detection of languages via `highlightAuto`.
1414
require('./detect');
@@ -26,4 +26,3 @@ require('./markup');
2626
// isn't actually used to test inside a browser but `jsdom` acts as a virtual
2727
// browser inside of node.js and runs together with all the other tests.
2828
require('./special');
29-

test/parser/index.js

+2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
'use strict';
22

3+
const should = require('should');
4+
35
describe('hljs', function() {
46
require('./look-ahead-end-matchers');
57
require('./resume-scan');

test/parser/resume-scan.js

+22-9
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,26 @@
1-
const hljs = require('../../build');
1+
'use strict';
22

3-
describe("bugs", function () {
3+
const hljs = require('../../build');
4+
hljs.debugMode(); // tests run in debug mode so errors are raised
45

6+
describe("bugs", function() {
57
describe("resume scan when a match is ignored", () => {
68
it("should continue to highlight later matches", () => {
7-
hljs.highlight('java', 'ImmutablePair.of(Stuff.class, "bar")').value
8-
.should.equal(
9-
'ImmutablePair.of(Stuff.class, <span class="hljs-string">&quot;bar&quot;</span>)'
10-
)
11-
})
12-
})
13-
})
9+
const result = hljs.highlight('java', 'ImmutablePair.of(Stuff.class, "bar")');
10+
result.value.should.equal(
11+
'ImmutablePair.of(Stuff.class, <span class="hljs-string">&quot;bar&quot;</span>)'
12+
);
13+
});
14+
// previously the match rule was resumed but it would scan ahead too far and ignore
15+
// later matches that matched the PRIOR rules... this is because when we "skip" (ignore) a
16+
// rule we really only want to skip searching for THAT rule at that same location, we
17+
// do not want to stop searching for ALL the prior rules at that location...
18+
it("BUT should not skip ahead too far", () => {
19+
const result = hljs.highlight('java', 'ImmutablePair.of(Stuff.class, "bar");\n23');
20+
result.value.should.equal(
21+
'ImmutablePair.of(Stuff.class, <span class="hljs-string">&quot;bar&quot;</span>);\n' +
22+
'<span class="hljs-number">23</span>'
23+
);
24+
});
25+
});
26+
});

0 commit comments

Comments
 (0)