Skip to content

Commit 4312b35

Browse files
author
Nathan Sobo
committed
Include common scanner.h in both custom scanners to avoid extra symbols
1 parent d5bf4ea commit 4312b35

File tree

6 files changed

+145
-143
lines changed

6 files changed

+145
-143
lines changed
File renamed without changes.

common/scanner.h

+138
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
#include <tree_sitter/parser.h>
2+
#include <wctype.h>
3+
4+
enum TokenType {
5+
AUTOMATIC_SEMICOLON,
6+
TEMPLATE_CHARS
7+
};
8+
9+
static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
10+
11+
static bool scan_whitespace_and_comments(TSLexer *lexer) {
12+
for (;;) {
13+
while (iswspace(lexer->lookahead)) {
14+
advance(lexer);
15+
}
16+
17+
if (lexer->lookahead == '/') {
18+
advance(lexer);
19+
20+
if (lexer->lookahead == '/') {
21+
advance(lexer);
22+
while (lexer->lookahead != 0 && lexer->lookahead != '\n') {
23+
advance(lexer);
24+
}
25+
} else if (lexer->lookahead == '*') {
26+
advance(lexer);
27+
while (lexer->lookahead != 0) {
28+
if (lexer->lookahead == '*') {
29+
advance(lexer);
30+
if (lexer->lookahead == '/') {
31+
advance(lexer);
32+
break;
33+
}
34+
} else {
35+
advance(lexer);
36+
}
37+
}
38+
} else {
39+
return false;
40+
}
41+
} else {
42+
return true;
43+
}
44+
}
45+
}
46+
47+
static inline bool external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
48+
if (valid_symbols[TEMPLATE_CHARS]) {
49+
if (valid_symbols[AUTOMATIC_SEMICOLON]) return false;
50+
lexer->result_symbol = TEMPLATE_CHARS;
51+
for (bool notfirst = false;; notfirst = true) {
52+
lexer->mark_end(lexer);
53+
switch (lexer->lookahead) {
54+
case '`':
55+
return notfirst;
56+
case '\0':
57+
return false;
58+
case '$':
59+
advance(lexer);
60+
if (lexer->lookahead == '{') return notfirst;
61+
break;
62+
case '\\':
63+
advance(lexer);
64+
advance(lexer);
65+
break;
66+
default:
67+
advance(lexer);
68+
}
69+
}
70+
} else {
71+
lexer->result_symbol = AUTOMATIC_SEMICOLON;
72+
lexer->mark_end(lexer);
73+
74+
for (;;) {
75+
if (lexer->lookahead == 0) return true;
76+
if (lexer->lookahead == '}') return true;
77+
if (!iswspace(lexer->lookahead)) return false;
78+
if (lexer->lookahead == '\n') break;
79+
advance(lexer);
80+
}
81+
82+
advance(lexer);
83+
84+
if (!scan_whitespace_and_comments(lexer)) return false;
85+
86+
switch (lexer->lookahead) {
87+
case ',':
88+
case '.':
89+
case ';':
90+
case '*':
91+
case '%':
92+
case '>':
93+
case '<':
94+
case '=':
95+
case '[':
96+
case '(':
97+
case '?':
98+
case '^':
99+
case '|':
100+
case '&':
101+
case '/':
102+
return false;
103+
104+
// Insert a semicolon before `--` and `++`, but not before binary `+` or `-`.
105+
case '+':
106+
advance(lexer);
107+
return lexer->lookahead == '+';
108+
case '-':
109+
advance(lexer);
110+
return lexer->lookahead == '-';
111+
112+
// Don't insert a semicolon before `!=`, but do insert one before a unary `!`.
113+
case '!':
114+
advance(lexer);
115+
return lexer->lookahead != '=';
116+
117+
// Don't insert a semicolon before `in` or `instanceof`, but do insert one
118+
// before an identifier.
119+
case 'i':
120+
advance(lexer);
121+
122+
if (lexer->lookahead != 'n') return true;
123+
advance(lexer);
124+
125+
if (!iswalpha(lexer->lookahead)) return false;
126+
127+
for (unsigned i = 0; i < 8; i++) {
128+
if (lexer->lookahead != "stanceof"[i]) return true;
129+
advance(lexer);
130+
}
131+
132+
if (!iswalpha(lexer->lookahead)) return false;
133+
break;
134+
}
135+
136+
return true;
137+
}
138+
}

tsx/grammar.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
const defineGrammar = require('../define-grammar');
1+
const defineGrammar = require('../common/define-grammar');
22

33
module.exports = defineGrammar('tsx');

tsx/src/scanner.c

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
#include <tree_sitter/parser.h>
2-
#include <wctype.h>
3-
#include "../../typescript/src/scanner.c"
1+
#include "../../common/scanner.h"
42

53
void *tree_sitter_tsx_external_scanner_create() { return NULL; }
64
void tree_sitter_tsx_external_scanner_destroy(void *p) {}
@@ -9,5 +7,5 @@ unsigned tree_sitter_tsx_external_scanner_serialize(void *p, char *buffer) { ret
97
void tree_sitter_tsx_external_scanner_deserialize(void *p, const char *b, unsigned n) {}
108

119
bool tree_sitter_tsx_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
12-
return tree_sitter_typescript_external_scanner_scan(payload, lexer, valid_symbols);
10+
return external_scanner_scan(payload, lexer, valid_symbols);
1311
}

typescript/grammar.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
const defineGrammar = require('../define-grammar');
1+
const defineGrammar = require('../common/define-grammar');
22

33
module.exports = defineGrammar('typescript');

typescript/src/scanner.c

+3-137
Original file line numberDiff line numberDiff line change
@@ -1,145 +1,11 @@
1-
#include <tree_sitter/parser.h>
2-
#include <wctype.h>
3-
4-
enum TokenType {
5-
AUTOMATIC_SEMICOLON,
6-
TEMPLATE_CHARS
7-
};
1+
#include "../../common/scanner.h"
82

93
void *tree_sitter_typescript_external_scanner_create() { return NULL; }
104
void tree_sitter_typescript_external_scanner_destroy(void *p) {}
115
void tree_sitter_typescript_external_scanner_reset(void *p) {}
126
unsigned tree_sitter_typescript_external_scanner_serialize(void *p, char *buffer) { return 0; }
137
void tree_sitter_typescript_external_scanner_deserialize(void *p, const char *b, unsigned n) {}
148

15-
static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
16-
17-
static bool scan_whitespace_and_comments(TSLexer *lexer) {
18-
for (;;) {
19-
while (iswspace(lexer->lookahead)) {
20-
advance(lexer);
21-
}
22-
23-
if (lexer->lookahead == '/') {
24-
advance(lexer);
25-
26-
if (lexer->lookahead == '/') {
27-
advance(lexer);
28-
while (lexer->lookahead != 0 && lexer->lookahead != '\n') {
29-
advance(lexer);
30-
}
31-
} else if (lexer->lookahead == '*') {
32-
advance(lexer);
33-
while (lexer->lookahead != 0) {
34-
if (lexer->lookahead == '*') {
35-
advance(lexer);
36-
if (lexer->lookahead == '/') {
37-
advance(lexer);
38-
break;
39-
}
40-
} else {
41-
advance(lexer);
42-
}
43-
}
44-
} else {
45-
return false;
46-
}
47-
} else {
48-
return true;
49-
}
50-
}
51-
}
52-
53-
bool tree_sitter_typescript_external_scanner_scan(void *payload, TSLexer *lexer,
54-
const bool *valid_symbols) {
55-
if (valid_symbols[TEMPLATE_CHARS]) {
56-
if (valid_symbols[AUTOMATIC_SEMICOLON]) return false;
57-
lexer->result_symbol = TEMPLATE_CHARS;
58-
for (bool notfirst = false;; notfirst = true) {
59-
lexer->mark_end(lexer);
60-
switch (lexer->lookahead) {
61-
case '`':
62-
return notfirst;
63-
case '\0':
64-
return false;
65-
case '$':
66-
advance(lexer);
67-
if (lexer->lookahead == '{') return notfirst;
68-
break;
69-
case '\\':
70-
advance(lexer);
71-
advance(lexer);
72-
break;
73-
default:
74-
advance(lexer);
75-
}
76-
}
77-
} else {
78-
lexer->result_symbol = AUTOMATIC_SEMICOLON;
79-
lexer->mark_end(lexer);
80-
81-
for (;;) {
82-
if (lexer->lookahead == 0) return true;
83-
if (lexer->lookahead == '}') return true;
84-
if (!iswspace(lexer->lookahead)) return false;
85-
if (lexer->lookahead == '\n') break;
86-
advance(lexer);
87-
}
88-
89-
advance(lexer);
90-
91-
if (!scan_whitespace_and_comments(lexer)) return false;
92-
93-
switch (lexer->lookahead) {
94-
case ',':
95-
case '.':
96-
case ';':
97-
case '*':
98-
case '%':
99-
case '>':
100-
case '<':
101-
case '=':
102-
case '[':
103-
case '(':
104-
case '?':
105-
case '^':
106-
case '|':
107-
case '&':
108-
case '/':
109-
return false;
110-
111-
// Insert a semicolon before `--` and `++`, but not before binary `+` or `-`.
112-
case '+':
113-
advance(lexer);
114-
return lexer->lookahead == '+';
115-
case '-':
116-
advance(lexer);
117-
return lexer->lookahead == '-';
118-
119-
// Don't insert a semicolon before `!=`, but do insert one before a unary `!`.
120-
case '!':
121-
advance(lexer);
122-
return lexer->lookahead != '=';
123-
124-
// Don't insert a semicolon before `in` or `instanceof`, but do insert one
125-
// before an identifier.
126-
case 'i':
127-
advance(lexer);
128-
129-
if (lexer->lookahead != 'n') return true;
130-
advance(lexer);
131-
132-
if (!iswalpha(lexer->lookahead)) return false;
133-
134-
for (unsigned i = 0; i < 8; i++) {
135-
if (lexer->lookahead != "stanceof"[i]) return true;
136-
advance(lexer);
137-
}
138-
139-
if (!iswalpha(lexer->lookahead)) return false;
140-
break;
141-
}
142-
143-
return true;
144-
}
9+
bool tree_sitter_typescript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
10+
return external_scanner_scan(payload, lexer, valid_symbols);
14511
}

0 commit comments

Comments
 (0)