Skip to content

Commit cda973c

Browse files
authored
enh(R) major overhaul for the R language grammar (#2680)
- fixes the idiosyncratic R rule for identifiers - fixes the list of keywords, which was previously just an ad-hoc list of some true keywords and some (but not all) functions from the base package - adds a category of builtin functions - adds a syntax rule to match R’s custom %…% infix operators - adds support for ‘roxygen’ doc comments - adds R 4.0 raw string literal (r"(…)") support - fixes some inaccuracies and missing features in the number literals, such as hexadecimal binary exponents (e.g. 0x1.ap+5 == 52).
1 parent b45e211 commit cda973c

13 files changed

+500
-108
lines changed

Diff for: CHANGES.md

+2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
Language Improvements:
44

5+
- enh(r) major overhaul of the R language grammar (and fix a few bugs) (#2680) [Konrad Rudolph][]
56
- enh(csharp) Add all C# 9 keywords, and other missing keywords (#2679) [David Pine][]
67
- enh(objectivec) Add `objective-c++` and `obj-c++` aliases for Objective-C [Josh Goebel][]
78
- enh(java) Add support for `record` (#2685) [Josh Goebel][]
@@ -11,6 +12,7 @@ Language Improvements:
1112
[David Pine]: https://github.com/IEvangelist
1213
[Josh Goebel]: https://github.com/yyyc514
1314
[Ryan Jonasson]: https://github.com/ryanjonasson
15+
[Konrad Rudolph]: https://github.com/klmr
1416

1517
## Version 10.2.0
1618

Diff for: src/languages/r.js

+125-46
Original file line numberDiff line numberDiff line change
@@ -2,75 +2,154 @@
22
Language: R
33
Description: R is a free software environment for statistical computing and graphics.
44
Author: Joe Cheng <[email protected]>
5+
Contributors: Konrad Rudolph <[email protected]>
56
Website: https://www.r-project.org
67
Category: scientific
78
*/
89

910
export default function(hljs) {
10-
var IDENT_RE = '([a-zA-Z]|\\.[a-zA-Z.])[a-zA-Z0-9._]*';
11+
// Identifiers in R cannot start with `_`, but they can start with `.` if it
12+
// is not immediately followed by a digit.
13+
// R also supports quoted identifiers, which are near-arbitrary sequences
14+
// delimited by backticks (`…`), which may contain escape sequences. These are
15+
// handled in a separate mode. See `test/markup/r/names.txt` for examples.
16+
// FIXME: Support Unicode identifiers.
17+
const IDENT_RE = /(?:(?:[a-zA-Z]|\.[._a-zA-Z])[._a-zA-Z0-9]*)|\.(?!\d)/;
1118

1219
return {
1320
name: 'R',
21+
22+
keywords: {
23+
$pattern: IDENT_RE,
24+
keyword:
25+
'function if in break next repeat else for while',
26+
literal:
27+
'NULL NA TRUE FALSE Inf NaN NA_integer_|10 NA_real_|10 ' +
28+
'NA_character_|10 NA_complex_|10',
29+
built_in:
30+
// Builtin constants
31+
'LETTERS letters month.abb month.name pi T F ' +
32+
// Primitive functions
33+
// These are all the functions in `base` that are implemented as a
34+
// `.Primitive`, minus those functions that are also keywords.
35+
'abs acos acosh all any anyNA Arg as.call as.character' +
36+
'as.complex as.double as.environment as.integer as.logical' +
37+
'as.null.default as.numeric as.raw asin asinh atan atanh attr' +
38+
'attributes baseenv browser c call ceiling class Conj cos cosh' +
39+
'cospi cummax cummin cumprod cumsum digamma dim dimnames' +
40+
'emptyenv exp expression floor forceAndCall gamma gc.time' +
41+
'globalenv Im interactive invisible is.array is.atomic is.call' +
42+
'is.character is.complex is.double is.environment is.expression' +
43+
'is.finite is.function is.infinite is.integer is.language' +
44+
'is.list is.logical is.matrix is.na is.name is.nan is.null' +
45+
'is.numeric is.object is.pairlist is.raw is.recursive is.single' +
46+
'is.symbol lazyLoadDBfetch length lgamma list log max min' +
47+
'missing Mod names nargs nzchar oldClass on.exit pos.to.env' +
48+
'proc.time prod quote range Re rep retracemem return round' +
49+
'seq_along seq_len seq.int sign signif sin sinh sinpi sqrt' +
50+
'standardGeneric substitute sum switch tan tanh tanpi tracemem' +
51+
'trigamma trunc unclass untracemem UseMethod xtfrm',
52+
},
53+
1454
contains: [
55+
// Roxygen comments
56+
hljs.COMMENT(
57+
/#'/,
58+
/$/,
59+
{
60+
contains: [
61+
{
62+
// Handle `@examples` separately to cause all subsequent code
63+
// until the next `@`-tag on its own line to be kept as-is,
64+
// preventing highlighting. This code is example R code, so nested
65+
// doctags shouldn’t be treated as such. See
66+
// `test/markup/r/roxygen.txt` for an example.
67+
className: 'doctag',
68+
begin: '@examples',
69+
starts: {
70+
contains: [
71+
{ begin: /\n/ },
72+
{
73+
begin: /#'\s*(?=@[a-zA-Z]+)/,
74+
endsParent: true,
75+
},
76+
{
77+
begin: /#'/,
78+
end: /$/,
79+
excludeBegin: true,
80+
}
81+
]
82+
}
83+
},
84+
{
85+
// Handle `@param` to highlight the parameter name following
86+
// after.
87+
className: 'doctag',
88+
begin: '@param',
89+
end: /$/,
90+
contains: [
91+
{
92+
className: 'variable',
93+
variants: [
94+
{ begin: IDENT_RE },
95+
{ begin: /`(?:\\.|[^`])+`/ }
96+
],
97+
endsParent: true
98+
}
99+
]
100+
},
101+
{
102+
className: 'doctag',
103+
begin: /@[a-zA-Z]+/
104+
},
105+
{
106+
className: 'meta-keyword',
107+
begin: /\\[a-zA-Z]+/,
108+
}
109+
]
110+
}
111+
),
112+
15113
hljs.HASH_COMMENT_MODE,
114+
16115
{
17-
begin: IDENT_RE,
18-
keywords: {
19-
$pattern: IDENT_RE,
20-
keyword:
21-
'function if in break next repeat else for return switch while try tryCatch ' +
22-
'stop warning require library attach detach source setMethod setGeneric ' +
23-
'setGroupGeneric setClass ...',
24-
literal:
25-
'NULL NA TRUE FALSE T F Inf NaN NA_integer_|10 NA_real_|10 NA_character_|10 ' +
26-
'NA_complex_|10'
27-
},
28-
relevance: 0
29-
},
30-
{
31-
// hex value
32-
className: 'number',
33-
begin: "0[xX][0-9a-fA-F]+[Li]?\\b",
34-
relevance: 0
35-
},
36-
{
37-
// explicit integer
38-
className: 'number',
39-
begin: "\\d+(?:[eE][+\\-]?\\d*)?L\\b",
40-
relevance: 0
41-
},
42-
{
43-
// number with trailing decimal
44-
className: 'number',
45-
begin: "\\d+\\.(?!\\d)(?:i\\b)?",
46-
relevance: 0
116+
className: 'string',
117+
contains: [hljs.BACKSLASH_ESCAPE],
118+
variants: [
119+
hljs.END_SAME_AS_BEGIN({ begin: /[rR]"(-*)\(/, end: /\)(-*)"/ }),
120+
hljs.END_SAME_AS_BEGIN({ begin: /[rR]"(-*)\{/, end: /\}(-*)"/ }),
121+
hljs.END_SAME_AS_BEGIN({ begin: /[rR]"(-*)\[/, end: /\](-*)"/ }),
122+
hljs.END_SAME_AS_BEGIN({ begin: /[rR]'(-*)\(/, end: /\)(-*)'/ }),
123+
hljs.END_SAME_AS_BEGIN({ begin: /[rR]'(-*)\{/, end: /\}(-*)'/ }),
124+
hljs.END_SAME_AS_BEGIN({ begin: /[rR]'(-*)\[/, end: /\](-*)'/ }),
125+
{begin: '"', end: '"', relevance: 0},
126+
{begin: "'", end: "'", relevance: 0}
127+
],
47128
},
129+
48130
{
49-
// number
50131
className: 'number',
51-
begin: "\\d+(?:\\.\\d*)?(?:[eE][+\\-]?\\d*)?i?\\b",
132+
variants: [
133+
// Special case: only hexadecimal binary powers can contain fractions.
134+
{ begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/ },
135+
{ begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+([pP][+-]?\d+)?[Li]?/ },
136+
{ begin: /(?<![a-zA-Z0-9._])(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?[Li]?/ }
137+
],
52138
relevance: 0
53139
},
140+
54141
{
55-
// number with leading decimal
56-
className: 'number',
57-
begin: "\\.\\d+(?:[eE][+\\-]?\\d*)?i?\\b",
58-
relevance: 0
142+
// infix operator
143+
begin: '%',
144+
end: '%'
59145
},
60146

61147
{
62148
// escaped identifier
63149
begin: '`',
64150
end: '`',
65-
relevance: 0
66-
},
67-
68-
{
69-
className: 'string',
70-
contains: [hljs.BACKSLASH_ESCAPE],
71-
variants: [
72-
{begin: '"', end: '"'},
73-
{begin: "'", end: "'"}
151+
contains: [
152+
{ begin: /\\./ }
74153
]
75154
}
76155
]

Diff for: test/detect/r/default.txt

+39-62
Original file line numberDiff line numberDiff line change
@@ -1,69 +1,46 @@
1-
library(ggplot2)
2-
3-
centre <- function(x, type, ...) {
1+
require(stats)
2+
3+
#' Compute different averages
4+
#'
5+
#' @param x \code{numeric} vector of sample data
6+
#' @param type \code{character} vector of length 1 specifying the average type
7+
#' @return \code{centre} returns the sample average according to the chosen method.
8+
#' @examples
9+
#' centre(rcauchy(10), "mean")
10+
#' @export
11+
centre <- function(x, type) {
412
switch(type,
513
mean = mean(x),
614
median = median(x),
715
trimmed = mean(x, trim = .1))
816
}
17+
x <- rcauchy(10)
18+
centre(x, "mean")
919

10-
myVar1
11-
myVar.2
12-
data$x
13-
foo "bar" baz
14-
# test "test"
15-
"test # test"
16-
17-
(123) (1) (10) (0.1) (.2) (1e-7)
18-
(1.2e+7) (2e) (3e+10) (0x0) (0xa)
19-
(0xabcdef1234567890) (123L) (1L)
20-
(0x10L) (10000000L) (1e6L) (1.1L)
21-
(1e-3L) (4123.381E-10i)
22-
(3.) (3.E10) # BUG: .E10 should be part of number
23-
24-
# Numbers in some different contexts
25-
1L
26-
0x40
27-
.234
28-
3.
29-
1L + 30
30-
plot(cars, xlim=20)
31-
plot(cars, xlim=0x20)
32-
foo<-30
33-
my.data.3 <- read() # not a number
34-
c(1,2,3)
35-
1%%2
36-
37-
"this is a quote that spans
38-
multiple lines
39-
\"
40-
41-
is this still a quote? it should be.
42-
# even still!
43-
44-
" # now we're done.
45-
46-
'same for
47-
single quotes #'
48-
49-
# keywords
50-
NULL, NA, TRUE, FALSE, Inf, NaN, NA_integer_,
51-
NA_real_, NA_character_, NA_complex_, function,
52-
while, repeat, for, if, in, else, next, break,
53-
..., ..1, ..2
54-
55-
# not keywords
56-
the quick brown fox jumped over the lazy dogs
57-
null na true false inf nan na_integer_ na_real_
58-
na_character_ na_complex_ Function While Repeat
59-
For If In Else Next Break .. .... "NULL" `NULL` 'NULL'
60-
61-
# operators
62-
+, -, *, /, %%, ^, >, >=, <, <=, ==, !=, !, &, |, ~,
63-
->, <-, <<-, $, :, ::
64-
65-
# infix operator
66-
foo %union% bar
67-
%"test"%
68-
`"test"`
20+
library(ggplot2)
6921

22+
models <- tibble::tribble(
23+
~model_name, ~ formula,
24+
"length-width", Sepal.Length ~ Petal.Width + Petal.Length,
25+
"interaction", Sepal.Length ~ Petal.Width * Petal.Length
26+
)
27+
28+
iris %>%
29+
nest_by(Species) %>%
30+
left_join(models, by = character()) %>%
31+
rowwise(Species, model_name) %>%
32+
mutate(model = list(lm(formula, data = data))) %>%
33+
summarise(broom::glance(model))
34+
#> `summarise()` regrouping output by 'Species', 'model_name' (override with `.groups` argument)
35+
#> # A tibble: 6 x 13
36+
#> # Groups: Species, model_name [6]
37+
#> Species model_name r.squared adj.r.squared sigma statistic p.value df
38+
#> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
39+
#> 1 setosa length-wi… 0.112 0.0739 0.339 2.96 6.18e- 2 3
40+
#> 2 setosa interacti… 0.133 0.0760 0.339 2.34 8.54e- 2 4
41+
#> 3 versic… length-wi… 0.574 0.556 0.344 31.7 1.92e- 9 3
42+
#> 4 versic… interacti… 0.577 0.549 0.347 20.9 1.11e- 8 4
43+
#> 5 virgin… length-wi… 0.747 0.736 0.327 69.3 9.50e-15 3
44+
#> 6 virgin… interacti… 0.757 0.741 0.323 47.8 3.54e-14 4
45+
#> # … with 5 more variables: logLik <dbl>, AIC <dbl>, BIC <dbl>, deviance <dbl>,
46+
#> # df.residual <int>

Diff for: test/markup/r/names.expect.txt

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
<span class="hljs-comment"># Valid names</span>
2+
3+
a1_foo, A1_FOO, .foo_, ._foo, Bar.42, foo..1, ., ._, .., ..., ..1, <span class="hljs-built_in">c</span>, <span class="hljs-built_in">T</span>, <span class="hljs-built_in">F</span>, ._1
4+
5+
<span class="hljs-comment"># Reserved Words</span>
6+
7+
<span class="hljs-literal">NA</span>, <span class="hljs-literal">NA_integer_</span>, <span class="hljs-literal">NA_real_</span>, <span class="hljs-literal">NA_character_</span>, <span class="hljs-literal">NA_complex_</span>, <span class="hljs-literal">NULL</span>, <span class="hljs-literal">NaN</span>, <span class="hljs-literal">Inf</span>
8+
9+
<span class="hljs-comment"># Keywords</span>
10+
11+
<span class="hljs-keyword">function</span>, <span class="hljs-keyword">while</span>, <span class="hljs-keyword">repeat</span>, <span class="hljs-keyword">for</span>, <span class="hljs-keyword">if</span>, <span class="hljs-keyword">in</span>, <span class="hljs-keyword">else</span>, <span class="hljs-keyword">next</span>, <span class="hljs-keyword">break</span>
12+
13+
<span class="hljs-comment"># Not reserved</span>
14+
15+
NULLa, NULL1, NULL., `NULL`, <span class="hljs-string">&#x27;NULL&#x27;</span>, NA_foo_, na_real_, Function, for.
16+
17+
<span class="hljs-comment"># Primitive built-ins</span>
18+
19+
<span class="hljs-built_in">return</span>, <span class="hljs-built_in">switch</span>, <span class="hljs-built_in">sum</span>
20+
21+
<span class="hljs-comment"># Non-primitive base functions</span>
22+
23+
stop, try
24+
25+
<span class="hljs-comment"># Quoted identifiers</span>
26+
27+
`+`
28+
`%*%`
29+
`a 10 b`
30+
`for`
31+
`# x`
32+
`\`b`
33+
`\\`
34+
`%\`%`
35+
36+
<span class="hljs-comment"># Invalid names (for reference)</span>
37+
38+
<span class="hljs-comment"># 0abc, .0abc, abc+cde, _, _., _x, _1, .1_</span>

0 commit comments

Comments
 (0)