-
Notifications
You must be signed in to change notification settings - Fork 3.7k
fix(ruby) symbols, string interpolation, class names with underscores #4213
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
jimtng
wants to merge
9
commits into
highlightjs:main
Choose a base branch
from
jimtng:ruby-class-underscore
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 2 commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
67644b9
fix(ruby) symbols, string interpolation, class names with underscores
jimtng 1ce45f3
remove negative lookbehind
jimtng 0be49c6
reduce relevance of string interpolation
jimtng f53b568
remove relevance
jimtng 094e626
match single-letter class name
jimtng 2017467
match single char constant
jimtng 34ee845
match class inheritance
jimtng 1e751a9
clean up last commas
jimtng eace348
remove the use of lookbehind
jimtng File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,13 +11,7 @@ export default function(hljs) { | |
const regex = hljs.regex; | ||
const RUBY_METHOD_RE = '([a-zA-Z_]\\w*[!?=]?|[-+~]@|<<|>>|=~|===?|<=>|[<>]=?|\\*\\*|[-/+%^&*~`|]|\\[\\]=?)'; | ||
// TODO: move concepts like CAMEL_CASE into `modes.js` | ||
const CLASS_NAME_RE = regex.either( | ||
/\b([A-Z]+[a-z0-9]+)+/, | ||
// ends in caps | ||
/\b([A-Z]+[a-z0-9]+)+[A-Z]+/, | ||
) | ||
; | ||
const CLASS_NAME_WITH_NAMESPACE_RE = regex.concat(CLASS_NAME_RE, /(::\w+)*/) | ||
const CLASS_NAME_RE = /\b([A-Z]+[a-z0-9_]+)+[A-Z]*/; | ||
// very popular ruby built-ins that one might even assume | ||
// are actual keywords (despite that not being the case) | ||
const PSEUDO_KWS = [ | ||
|
@@ -120,19 +114,16 @@ export default function(hljs) { | |
className: 'subst', | ||
begin: /#\{/, | ||
end: /\}/, | ||
keywords: RUBY_KEYWORDS | ||
keywords: RUBY_KEYWORDS, | ||
relevance: 10 | ||
joshgoebel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
}; | ||
const STRING = { | ||
const STRING_INTERPOLABLE = { | ||
className: 'string', | ||
contains: [ | ||
hljs.BACKSLASH_ESCAPE, | ||
SUBST | ||
], | ||
variants: [ | ||
{ | ||
begin: /'/, | ||
end: /'/ | ||
}, | ||
{ | ||
begin: /"/, | ||
end: /"/ | ||
|
@@ -142,45 +133,45 @@ export default function(hljs) { | |
end: /`/ | ||
}, | ||
{ | ||
begin: /%[qQwWx]?\(/, | ||
joshgoebel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
end: /\)/ | ||
begin: /%[QWx]?\(/, | ||
end: /\)/, | ||
relevance: 2 | ||
}, | ||
{ | ||
begin: /%[qQwWx]?\[/, | ||
end: /\]/ | ||
begin: /%[QWx]?\[/, | ||
end: /\]/, | ||
relevance: 2 | ||
}, | ||
{ | ||
begin: /%[qQwWx]?\{/, | ||
end: /\}/ | ||
begin: /%[QWx]?\{/, | ||
end: /\}/, | ||
relevance: 2 | ||
}, | ||
{ | ||
begin: /%[qQwWx]?</, | ||
end: />/ | ||
begin: /%[QWx]?</, | ||
end: />/, | ||
relevance: 2 | ||
}, | ||
{ | ||
begin: /%[qQwWx]?\//, | ||
end: /\// | ||
begin: /%[QWx]?\//, | ||
end: /\//, | ||
relevance: 2 | ||
}, | ||
{ | ||
begin: /%[qQwWx]?%/, | ||
end: /%/ | ||
begin: /%[QWx]?%/, | ||
end: /%/, | ||
relevance: 2 | ||
}, | ||
{ | ||
begin: /%[qQwWx]?-/, | ||
end: /-/ | ||
begin: /%[QWx]?-/, | ||
end: /-/, | ||
relevance: 2 | ||
}, | ||
{ | ||
begin: /%[qQwWx]?\|/, | ||
end: /\|/ | ||
begin: /%[QWx]?\|/, | ||
end: /\|/, | ||
relevance: 2 | ||
}, | ||
// in the following expressions, \B in the beginning suppresses recognition of ?-sequences | ||
// where ? is the last character of a preceding identifier, as in: `func?4` | ||
{ begin: /\B\?(\\\d{1,3})/ }, | ||
{ begin: /\B\?(\\x[A-Fa-f0-9]{1,2})/ }, | ||
{ begin: /\B\?(\\u\{?[A-Fa-f0-9]{1,6}\}?)/ }, | ||
{ begin: /\B\?(\\M-\\C-|\\M-\\c|\\c\\M-|\\M-|\\C-\\M-)[\x20-\x7e]/ }, | ||
{ begin: /\B\?\\(c|C-)[\x20-\x7e]/ }, | ||
{ begin: /\B\?\\?\S/ }, | ||
// heredocs | ||
{ | ||
// this guard makes sure that we have an entire heredoc and not a false | ||
|
@@ -202,6 +193,63 @@ export default function(hljs) { | |
} | ||
] | ||
}; | ||
const STRING_NONINTERPOLABLE = { | ||
className: 'string', | ||
variants: [ | ||
{ | ||
begin: /'/, | ||
end: /'/ | ||
}, | ||
{ | ||
begin: /%[qw]?\(/, | ||
end: /\)/, | ||
relevance: 2 | ||
}, | ||
{ | ||
begin: /%[qw]?\[/, | ||
end: /\]/, | ||
relevance: 2 | ||
}, | ||
{ | ||
begin: /%[qw]?\{/, | ||
end: /\}/, | ||
relevance: 2 | ||
}, | ||
{ | ||
begin: /%[qw]?</, | ||
end: />/, | ||
relevance: 2 | ||
}, | ||
{ | ||
begin: /%[qw]?\//, | ||
end: /\//, | ||
relevance: 2 | ||
}, | ||
{ | ||
begin: /%[qw]?%/, | ||
end: /%/, | ||
relevance: 2 | ||
}, | ||
{ | ||
begin: /%[qw]?-/, | ||
end: /-/, | ||
relevance: 2 | ||
}, | ||
{ | ||
begin: /%[qw]?\|/, | ||
end: /\|/, | ||
relevance: 2 | ||
}, | ||
// in the following expressions, \B in the beginning suppresses recognition of ?-sequences | ||
// where ? is the last character of a preceding identifier, as in: `func?4` | ||
{ begin: /\B\?(\\\d{1,3})/ }, | ||
{ begin: /\B\?(\\x[A-Fa-f0-9]{1,2})/ }, | ||
{ begin: /\B\?(\\u\{?[A-Fa-f0-9]{1,6}\}?)/ }, | ||
{ begin: /\B\?(\\M-\\C-|\\M-\\c|\\c\\M-|\\M-|\\C-\\M-)[\x20-\x7e]/ }, | ||
{ begin: /\B\?\\(c|C-)[\x20-\x7e]/ }, | ||
{ begin: /\B\?\\?\S/ } | ||
] | ||
}; | ||
|
||
// Ruby syntax is underdocumented, but this grammar seems to be accurate | ||
// as of version 2.7.2 (confirmed with (irb and `Ripper.sexp(...)`) | ||
|
@@ -246,7 +294,7 @@ export default function(hljs) { | |
const INCLUDE_EXTEND = { | ||
match: [ | ||
/(include|extend)\s+/, | ||
CLASS_NAME_WITH_NAMESPACE_RE | ||
CLASS_NAME_RE | ||
], | ||
scope: { | ||
2: "title.class" | ||
|
@@ -259,15 +307,15 @@ export default function(hljs) { | |
{ | ||
match: [ | ||
/class\s+/, | ||
CLASS_NAME_WITH_NAMESPACE_RE, | ||
CLASS_NAME_RE, | ||
/\s+<\s+/, | ||
CLASS_NAME_WITH_NAMESPACE_RE | ||
CLASS_NAME_RE | ||
] | ||
}, | ||
{ | ||
match: [ | ||
/\b(class|module)\s+/, | ||
CLASS_NAME_WITH_NAMESPACE_RE | ||
CLASS_NAME_RE | ||
] | ||
} | ||
], | ||
|
@@ -301,7 +349,7 @@ export default function(hljs) { | |
const OBJECT_CREATION = { | ||
relevance: 0, | ||
match: [ | ||
CLASS_NAME_WITH_NAMESPACE_RE, | ||
CLASS_NAME_RE, | ||
/\.new[. (]/ | ||
], | ||
scope: { | ||
|
@@ -317,7 +365,8 @@ export default function(hljs) { | |
}; | ||
|
||
const RUBY_DEFAULT_CONTAINS = [ | ||
STRING, | ||
STRING_INTERPOLABLE, | ||
STRING_NONINTERPOLABLE, | ||
CLASS_DEFINITION, | ||
INCLUDE_EXTEND, | ||
OBJECT_CREATION, | ||
|
@@ -326,7 +375,8 @@ export default function(hljs) { | |
METHOD_DEFINITION, | ||
{ | ||
// swallow namespace qualifiers before symbols | ||
begin: hljs.IDENT_RE + '::' }, | ||
begin: '::' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah I think this works. |
||
}, | ||
{ | ||
className: 'symbol', | ||
begin: hljs.UNDERSCORE_IDENT_RE + '(!|\\?)?:', | ||
|
@@ -336,10 +386,17 @@ export default function(hljs) { | |
className: 'symbol', | ||
begin: ':(?!\\s)', | ||
contains: [ | ||
STRING, | ||
{ begin: RUBY_METHOD_RE } | ||
{ begin: /'/, end: /'/ }, | ||
{ | ||
begin: /"/, end: /"/, | ||
contains: [ | ||
hljs.BACKSLASH_ESCAPE, | ||
SUBST | ||
] | ||
}, | ||
{ begin: hljs.UNDERSCORE_IDENT_RE } | ||
], | ||
relevance: 0 | ||
relevance: 1 | ||
}, | ||
NUMBER, | ||
{ | ||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
<span class="hljs-title class_">Class</span> | ||
<span class="hljs-title class_">ClassName</span> | ||
<span class="hljs-title class_">Class_Name</span> | ||
<span class="hljs-title class_">ClassNAME</span> | ||
<span class="hljs-title class_">ClassName</span>::<span class="hljs-title class_">With</span>::<span class="hljs-title class_">Namespace</span> | ||
<span class="hljs-title class_">ClassName</span>::<span class="hljs-title class_">With</span>.method | ||
::<span class="hljs-title class_">TopLevel</span>::<span class="hljs-title class_">Class</span> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
Class | ||
ClassName | ||
Class_Name | ||
ClassNAME | ||
ClassName::With::Namespace | ||
ClassName::With.method | ||
::TopLevel::Class |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
<span class="hljs-symbol">:symbol</span> | ||
<span class="hljs-symbol">:Symbol</span> | ||
<span class="hljs-symbol">:_leading</span> | ||
<span class="hljs-symbol">:trailing_</span> | ||
<span class="hljs-symbol">:contains_underscore</span> | ||
<span class="hljs-symbol">:symbol_CAPS</span> | ||
<span class="hljs-symbol">:"string symbol"</span> | ||
<span class="hljs-symbol">:"interpolated <span class="hljs-subst">#{test}</span>"</span> | ||
<span class="hljs-symbol">:'string symbol'</span> | ||
<span class="hljs-symbol">:'not interpolated #{test}'</span> | ||
method <span class="hljs-symbol">:symbol</span> | ||
method(<span class="hljs-symbol">:symbol</span>) | ||
method(&<span class="hljs-symbol">:symbol</span>) | ||
assign=<span class="hljs-symbol">:symbol</span> | ||
assign = <span class="hljs-symbol">:symbol</span> | ||
<span class="hljs-symbol">:symbol</span>, others | ||
<span class="hljs-symbol">:</span>1notasymbol | ||
<span class="hljs-symbol">:</span><span class="hljs-string">%q[notasymbol]</span> | ||
|
||
::notsymbol | ||
|
||
<span class="hljs-symbol">hash_symbol:</span> value |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
:symbol | ||
:Symbol | ||
:_leading | ||
:trailing_ | ||
:contains_underscore | ||
:symbol_CAPS | ||
:"string symbol" | ||
:"interpolated #{test}" | ||
:'string symbol' | ||
:'not interpolated #{test}' | ||
method :symbol | ||
method(:symbol) | ||
method(&:symbol) | ||
assign=:symbol | ||
assign = :symbol | ||
:symbol, others | ||
:1notasymbol | ||
:%q[notasymbol] | ||
|
||
::notsymbol | ||
|
||
hash_symbol: value |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Doesn't this removal break (at least) the inheritance highlighting?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you give an example syntax of what you mean?
will work fine and better, because it doesn't highlight the
::
much like it doesn't here in github, or vscode.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We have special rules to match syntax where we KNOW an identifier is a class (or class with namespaces).
Striving for exact matches against other highlighting tools isn't a thing we care about here.
In this case I'm sympathetic to not highlighting the
::
, but only if it can be done without adding a lot of complexity or advanced parsing to the grammar.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You really want something like
[class](::[class])*
but our multi-match engine can't really handle that - with discrete coloring of the different pieces. Maybe if you tried a long sequence with some items that matched 0 length, but I'm not sure I've tested multi-matching in that type of scenario before.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This case would've worked fine, except it didn't recognise single-letter classes. I've adjusted it to do so.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please use "show the structure". as you develop. If you're matching all those "one off" with just your class rule then you're missing out on the fact that the portion after the
<
is not just atitle.class
, it's atitle.class.inherited
.This is why the simple multi-match rules exist - to flesh out more detail as well as handle cases like
class A
where we KNOW A is a class here, not a constant (as it would be with non other context).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wasn't aware of the different handling for
title.class.inherited
.Whilst special casing
class A
works using a multi-match rule, it wouldn't help forA.method(foo)
which will still be matched as a constant. So I'd suggest we simplify and not handle theclass A
case at all.In the current version, it doesn't work either: https://highlightjs.org/demo#lang=ruby&v=1&theme=atom-one-dark&code=Y2xhc3MgQQplbmQKCscNOjpCywsgPCBDOjpExxJGb286OkZvbyA8yQvKGsYVyEjEC8VXQSA9ICdYJw%3D%3D
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
With the latest commit here, it looks like this
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.