Skip to content
This repository was archived by the owner on Sep 30, 2024. It is now read-only.

Commit bcfa9fd

Browse files
authored
Highlighting: add tree-sitter support for the most common programming languages (#47571)
Previously, we only supported Syntect syntax highlighting for C# and Scala. This PR adds tree-sitter support for more languages (JS, TS, C, C++, Java, Ruby, Rust, and Python), which should both improve performance and produce higher quality highlighting. Down the road, we can start emitting symbol data alongside highlighting to unblock other features like rendering stable symbol-based URLs for "Copy link". Follow-up items: - [ ] Update highlights.scm to only use SCIP SyntaxKind names. For example, we should replace all `@variable` with `@identifier`. - [ ] Update SCIP `SyntaxKind` to include: `IdentifierField`, `IdentifierMacro`. See added TODOs in highlights.scm for a few other cases where our current kinds may not be good enough. - [ ] Start emitting `IdentifierLocal` based on `locals.scm` queries - [ ] Update SCIP to rename `IdentifierKeyword` into `Keyword`. ## Test plan See updated snapshot tests. <!-- All pull requests REQUIRE a test plan: https://docs.sourcegraph.com/dev/background-information/testing_principles -->
1 parent ec80653 commit bcfa9fd

31 files changed

+8717
-25
lines changed

cmd/frontend/internal/highlight/language.go

+28-4
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,17 @@ const (
2121
EngineSyntect
2222
)
2323

24+
func (e EngineType) String() string {
25+
switch e {
26+
case EngineTreeSitter:
27+
return "tree-sitter"
28+
case EngineSyntect:
29+
return "syntect"
30+
default:
31+
return "invalid"
32+
}
33+
}
34+
2435
// Converts an engine type to the corresponding parameter value for the syntax
2536
// highlighting request. Defaults to "syntec".
2637
func getEngineParameter(engine EngineType) string {
@@ -59,6 +70,8 @@ var highlightConfig = syntaxHighlightConfig{
5970
}
6071
var baseHighlightConfig = syntaxHighlightConfig{
6172
Extensions: map[string]string{
73+
"jsx": "jsx", // default `getLanguage()` helper doesn't handle JSX
74+
"tsx": "tsx", // default `getLanguage()` helper doesn't handle TSX
6275
"sbt": "scala",
6376
"sc": "scala",
6477
"xlsg": "xlsg",
@@ -91,10 +104,21 @@ var engineConfig = syntaxEngineConfig{
91104
var baseEngineConfig = syntaxEngineConfig{
92105
Default: EngineSyntect,
93106
Overrides: map[string]EngineType{
94-
"scala": EngineTreeSitter,
95-
"c#": EngineTreeSitter,
96-
"jsonnet": EngineTreeSitter,
97-
"xlsg": EngineTreeSitter,
107+
"javascript": EngineTreeSitter,
108+
"jsx": EngineTreeSitter,
109+
"typescript": EngineTreeSitter,
110+
"tsx": EngineTreeSitter,
111+
"python": EngineTreeSitter,
112+
"java": EngineTreeSitter,
113+
"c": EngineTreeSitter,
114+
"cpp": EngineTreeSitter,
115+
"c++": EngineTreeSitter,
116+
"scala": EngineTreeSitter,
117+
"rust": EngineTreeSitter,
118+
"ruby": EngineTreeSitter,
119+
"c#": EngineTreeSitter,
120+
"jsonnet": EngineTreeSitter,
121+
"xlsg": EngineTreeSitter,
98122
},
99123
}
100124

docker-images/syntax-highlighter/Cargo.lock

+55
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docker-images/syntax-highlighter/crates/sg-syntax/Cargo.toml

+6
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,12 @@ tree-sitter-scala = { git = "https://github.com/sourcegraph/tree-sitter-scala" }
4949

5050
tree-sitter-xlsg = { git = "https://github.com/sourcegraph/tree-sitter-xlsg" }
5151

52+
tree-sitter-c = "0.20.2"
53+
tree-sitter-java = "0.20.0"
54+
tree-sitter-javascript = "0.20.0"
55+
tree-sitter-rust = "0.20.3"
56+
tree-sitter-typescript = "0.20.2"
57+
5258
[dev-dependencies]
5359
insta = "1.11.0"
5460
pretty_assertions = "1.2.1"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
"--" @identifier.operator
2+
"-" @identifier.operator
3+
"-=" @identifier.operator
4+
"->" @identifier.operator
5+
"=" @identifier.operator
6+
"!=" @identifier.operator
7+
"*" @identifier.operator
8+
"&" @identifier.operator
9+
"&&" @identifier.operator
10+
"+" @identifier.operator
11+
"++" @identifier.operator
12+
"+=" @identifier.operator
13+
"<" @identifier.operator
14+
"==" @identifier.operator
15+
">" @identifier.operator
16+
"||" @identifier.operator
17+
"!" @identifier.operator
18+
19+
; "." @delimiter
20+
; ";" @delimiter
21+
22+
(string_literal) @string
23+
(system_lib_string) @string
24+
25+
(null) @constant.null
26+
(number_literal) @number
27+
(char_literal) @character
28+
(true) @boolean
29+
(false) @boolean
30+
31+
(call_expression
32+
function: (identifier) @identifier.function)
33+
(call_expression
34+
function: (field_expression
35+
field: (field_identifier) @identifier.function))
36+
(function_declarator
37+
declarator: (identifier) @identifier.function)
38+
(preproc_function_def
39+
name: (identifier) @identifier.function)
40+
41+
(field_identifier) @identifier ;; TODO: something better
42+
(statement_identifier) @identifier
43+
(type_identifier) @type
44+
(primitive_type) @type.builtin
45+
(sized_type_specifier) @type
46+
47+
((identifier) @constant
48+
(#match? @constant "^[A-Z][A-Z\\d_]*$"))
49+
50+
(identifier) @identifier
51+
52+
(comment) @comment
53+
54+
"break" @keyword
55+
"case" @keyword
56+
"const" @keyword
57+
"continue" @keyword
58+
"default" @keyword
59+
"do" @keyword
60+
"else" @keyword
61+
"enum" @keyword
62+
"extern" @keyword
63+
"for" @keyword
64+
"if" @keyword
65+
"inline" @keyword
66+
"return" @keyword
67+
"sizeof" @keyword
68+
"static" @keyword
69+
"struct" @keyword
70+
"switch" @keyword
71+
"typedef" @keyword
72+
"union" @keyword
73+
"volatile" @keyword
74+
"while" @keyword
75+
76+
"#define" @keyword
77+
"#elif" @keyword
78+
"#else" @keyword
79+
"#endif" @keyword
80+
"#if" @keyword
81+
"#ifdef" @keyword
82+
"#ifndef" @keyword
83+
"#include" @keyword
84+
(preproc_directive) @keyword
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
; Methods
2+
3+
(method_declaration
4+
name: (identifier) @identifier.function)
5+
(method_invocation
6+
name: (identifier) @identifier.function)
7+
(super) @identifier.builtin
8+
9+
; Annotations
10+
11+
(annotation
12+
name: (identifier) @identifier.attribute)
13+
(marker_annotation
14+
name: (identifier) @identifier.attribute)
15+
16+
"@" @operator
17+
18+
; Types
19+
20+
(type_identifier) @identifier.type
21+
22+
(interface_declaration
23+
name: (identifier) @identifier.type)
24+
(class_declaration
25+
name: (identifier) @identifier.type)
26+
(enum_declaration
27+
name: (identifier) @identifier.type)
28+
29+
((field_access
30+
object: (identifier) @identifier.type)
31+
(#match? @identifier.type "^[A-Z]"))
32+
((scoped_identifier
33+
scope: (identifier) @identifier.type)
34+
(#match? @identifier.type "^[A-Z]"))
35+
((method_invocation
36+
object: (identifier) @identifier.type)
37+
(#match? @identifier.type "^[A-Z]"))
38+
((method_reference
39+
. (identifier) @identifier.type)
40+
(#match? @identifier.type "^[A-Z]"))
41+
42+
(constructor_declaration
43+
name: (identifier) @identifier.type)
44+
45+
[
46+
(boolean_type)
47+
(integral_type)
48+
(floating_point_type)
49+
(floating_point_type)
50+
(void_type)
51+
] @identifier.builtin
52+
53+
; Variables
54+
55+
((identifier) @constant
56+
(#match? @constant "^_*[A-Z][A-Z\\d_]+$"))
57+
58+
(identifier) @identifier
59+
60+
(this) @identifier.builtin
61+
62+
; Literals
63+
64+
[
65+
(hex_integer_literal)
66+
(decimal_integer_literal)
67+
(octal_integer_literal)
68+
(decimal_floating_point_literal)
69+
(hex_floating_point_literal)
70+
] @number
71+
72+
[
73+
(character_literal)
74+
(string_literal)
75+
(text_block)
76+
] @string
77+
78+
[
79+
(true)
80+
(false)
81+
] @boolean
82+
83+
(null_literal) @constant.null
84+
85+
[
86+
(line_comment)
87+
(block_comment)
88+
] @comment
89+
90+
; Keywords
91+
92+
[
93+
"abstract"
94+
"assert"
95+
"break"
96+
"case"
97+
"catch"
98+
"class"
99+
"record"
100+
"continue"
101+
"default"
102+
"do"
103+
"else"
104+
"enum"
105+
"exports"
106+
"extends"
107+
"final"
108+
"finally"
109+
"for"
110+
"if"
111+
"implements"
112+
"import"
113+
"instanceof"
114+
"interface"
115+
"module"
116+
"native"
117+
"new"
118+
"non-sealed"
119+
"open"
120+
"opens"
121+
"package"
122+
"private"
123+
"protected"
124+
"provides"
125+
"public"
126+
"requires"
127+
"return"
128+
"sealed"
129+
"static"
130+
"strictfp"
131+
"switch"
132+
"synchronized"
133+
"throw"
134+
"throws"
135+
"to"
136+
"transient"
137+
"transitive"
138+
"try"
139+
"uses"
140+
"volatile"
141+
"while"
142+
"with"
143+
] @keyword

0 commit comments

Comments
 (0)