Skip to content

fix(parser) complete fix for resuming matches from same index #2678

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Sep 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@
"test": "mocha test",
"test-markup": "mocha test/markup",
"test-detect": "mocha test/detect",
"test-browser": "mocha test/browser"
"test-browser": "mocha test/browser",
"test-parser": "mocha test/parser"
},
"engines": {
"node": "*"
Expand Down
9 changes: 1 addition & 8 deletions src/highlight.js
Original file line number Diff line number Diff line change
Expand Up @@ -498,20 +498,13 @@ const HLJS = function(hljs) {
// considered for a potential match
resumeScanAtSamePosition = false;
} else {
top.matcher.lastIndex = index;
top.matcher.considerAll();
}
top.matcher.lastIndex = index;

const match = top.matcher.exec(codeToHighlight);
// console.log("match", match[0], match.rule && match.rule.begin)

// if our failure to match was the result of a "resumed scan" then we
// need to advance one position and revert to full scanning before we
// decide there are truly no more matches at all to be had
if (!match && top.matcher.resumingScanAtSamePosition()) {
advanceOne();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

advanceOne() is no longer needed.
Otherwise it's all looks good!

continue;
}
if (!match) break;

const beforeMatch = codeToHighlight.substring(index, match.index);
Expand Down
53 changes: 48 additions & 5 deletions src/lib/mode_compiler.js
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ export function compileLanguage(language) {
}

resumingScanAtSamePosition() {
return this.regexIndex != 0;
return this.regexIndex !== 0;
}

considerAll() {
Expand All @@ -160,15 +160,58 @@ export function compileLanguage(language) {
exec(s) {
const m = this.getMatcher(this.regexIndex);
m.lastIndex = this.lastIndex;
const result = m.exec(s);
let result = m.exec(s);

// The following is because we have no easy way to say "resume scanning at the
// existing position but also skip the current rule ONLY". What happens is
// all prior rules are also skipped which can result in matching the wrong
// thing. Example of matching "booger":

// our matcher is [string, "booger", number]
//
// ....booger....

// if "booger" is ignored then we'd really need a regex to scan from the
// SAME position for only: [string, number] but ignoring "booger" (if it
// was the first match), a simple resume would scan ahead who knows how
// far looking only for "number", ignoring potential string matches (or
// future "booger" matches that might be valid.)

// So what we do: We execute two matchers, one resuming at the same
// position, but the second full matcher starting at the position after:

// /--- resume first regex match here (for [number])
// |/---- full match here for [string, "booger", number]
// vv
// ....booger....

// Which ever results in a match first is then used. So this 3-4 step
// process essentially allows us to say "match at this position, excluding
// a prior rule that was ignored".
//
// 1. Match "booger" first, ignore. Also proves that [string] does non match.
// 2. Resume matching for [number]
// 3. Match at index + 1 for [string, "booger", number]
// 4. If #2 and #3 result in matches, which came first?
if (this.resumingScanAtSamePosition()) {
if (result && result.index === this.lastIndex) {
// result is position +0 and therefore a valid
// "resume" match so result stays result
} else { // use the second matcher result
const m2 = this.getMatcher(0);
m2.lastIndex = this.lastIndex + 1;
result = m2.exec(s);
}
}

if (result) {
this.regexIndex += result.position + 1;
if (this.regexIndex === this.count) { // wrap-around
this.regexIndex = 0;
if (this.regexIndex === this.count) {
// wrap-around to considering all matches again
this.considerAll();
}
}

// this.regexIndex = 0;
return result;
}
}
Expand Down
5 changes: 2 additions & 3 deletions test/index.js
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
'use strict';

const hljs = require('../build');
const hljs = require('../build');
hljs.debugMode(); // tests run in debug mode so errors are raised

// Tests specific to the API exposed inside the hljs object.
// Right now, that only includes tests for several common regular expressions.
require('./api');

// Test weird bugs we've fixed over time
require("./parser")
require("./parser");

// Tests for auto detection of languages via `highlightAuto`.
require('./detect');
Expand All @@ -26,4 +26,3 @@ require('./markup');
// isn't actually used to test inside a browser but `jsdom` acts as a virtual
// browser inside of node.js and runs together with all the other tests.
require('./special');

2 changes: 2 additions & 0 deletions test/parser/index.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
'use strict';

const should = require('should');

describe('hljs', function() {
require('./look-ahead-end-matchers');
require('./resume-scan');
Expand Down
31 changes: 22 additions & 9 deletions test/parser/resume-scan.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,26 @@
const hljs = require('../../build');
'use strict';

describe("bugs", function () {
const hljs = require('../../build');
hljs.debugMode(); // tests run in debug mode so errors are raised

describe("bugs", function() {
describe("resume scan when a match is ignored", () => {
it("should continue to highlight later matches", () => {
hljs.highlight('java', 'ImmutablePair.of(Stuff.class, "bar")').value
.should.equal(
'ImmutablePair.of(Stuff.class, <span class="hljs-string">&quot;bar&quot;</span>)'
)
})
})
})
const result = hljs.highlight('java', 'ImmutablePair.of(Stuff.class, "bar")');
result.value.should.equal(
'ImmutablePair.of(Stuff.class, <span class="hljs-string">&quot;bar&quot;</span>)'
);
});
// previously the match rule was resumed but it would scan ahead too far and ignore
// later matches that matched the PRIOR rules... this is because when we "skip" (ignore) a
// rule we really only want to skip searching for THAT rule at that same location, we
// do not want to stop searching for ALL the prior rules at that location...
it("BUT should not skip ahead too far", () => {
const result = hljs.highlight('java', 'ImmutablePair.of(Stuff.class, "bar");\n23');
result.value.should.equal(
'ImmutablePair.of(Stuff.class, <span class="hljs-string">&quot;bar&quot;</span>);\n' +
'<span class="hljs-number">23</span>'
);
});
});
});