Skip to content

Commit e1237f7

Browse files
author
Nick Frasser
authored
Scanner token parsing refactor (#353)
* Refactor scanner to emit more granular tokens The DOMAIN token (among others) has been removed in favour of WORD, UWORD and other tokens. SCHEME (formerly PROTOCOL) tokens now come in several flavours. Includes additional facilities for token groups, which will be useful for future plugins down the line Also fixes file URL behaviour * Update plugins to use new scanner tokens Also improves hashtag and mention plugin accuracy * Update tests to work with new scanner * Better workspace packages to improve build order * Update benchmark require * Remove resolved FIXME * Additional tests for linkify register functions Fixes #171 Fixes #245 Fixes #351
1 parent 3d4637e commit e1237f7

18 files changed

+714
-535
lines changed

package.json

+2
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@
6161
"node": ">=8"
6262
},
6363
"workspaces": [
64+
"./packages/linkifyjs",
65+
"./packages/linkify-plugin-*/",
6466
"./packages/*"
6567
]
6668
}

packages/linkifyjs/src/core/fsm.js

+31-7
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
* @param {string|class} token to emit
1111
*/
1212
export function State(token) {
13+
// this.n = null; // DEBUG: State name
1314
this.j = {}; // IMPLEMENTATION 1
1415
// this.j = []; // IMPLEMENTATION 2
1516
this.jr = [];
@@ -49,11 +50,21 @@ State.prototype = {
4950
* transitioned to on the given input regardless of what that input
5051
* previously did.
5152
*
52-
* @param {string} input character or token to transition on
53+
* @param {string} input character or token type to transition on
5354
* @param {Token|State} tokenOrState transition to a matching state
5455
* @returns State taken after the given input
5556
*/
5657
tt(input, tokenOrState) {
58+
if (input instanceof Array) {
59+
// Recursive case
60+
if (input.length === 0) { return; }
61+
const nextState = this.tt(input[0], tokenOrState);
62+
for (let i = 1; i < input.length; i++) {
63+
this.tt(input[i], nextState);
64+
}
65+
return nextState;
66+
}
67+
5768
if (tokenOrState && tokenOrState.j) {
5869
// State, default a basic transition
5970
this.j[input] = tokenOrState;
@@ -92,13 +103,21 @@ State.prototype = {
92103
* Utility function to create state without using new keyword (reduced file size
93104
* when minified)
94105
*/
95-
export const makeState = () => new State();
106+
export const makeState = (/*name*/) => {
107+
const s = new State();
108+
// if (name) { s.n = name; } // DEBUG
109+
return s;
110+
};
96111

97112
/**
98113
* Similar to previous except it is an accepting state that emits a token
99114
* @param {Token} token
100115
*/
101-
export const makeAcceptingState = (token) => new State(token);
116+
export const makeAcceptingState = (token/*, name*/) => {
117+
const s = new State(token);
118+
// if (name) { s.n = name; } // DEBUG
119+
return s;
120+
};
102121

103122
/**
104123
* Create a transition from startState to nextState via the given character
@@ -112,6 +131,7 @@ export const makeT = (startState, input, nextState) => {
112131

113132
// IMPLEMENTATION 2: Add to array (slower)
114133
// startState.j.push([input, nextState]);
134+
return startState.j[input];
115135
};
116136

117137
/**
@@ -127,7 +147,7 @@ export const makeRegexT = (startState, regex, nextState) => {
127147
/**
128148
* Follow the transition from the given character to the next state
129149
* @param {State} state
130-
* @param {Token} input character or other concrete token type to transition
150+
* @param {string|Token} input character or other concrete token type to transition
131151
* @returns {?State} the next state, if any
132152
*/
133153
export const takeT = (state, input) => {
@@ -145,8 +165,8 @@ export const takeT = (state, input) => {
145165

146166
for (let i = 0; i < state.jr.length; i++) {
147167
const regex = state.jr[i][0];
148-
const nextState = state.jr[i][1];
149-
if (regex.test(input)) {return nextState;}
168+
const nextState = state.jr[i][1]; // note: might be empty to prevent default jump
169+
if (nextState && regex.test(input)) { return nextState; }
150170
}
151171
// Nowhere left to jump! Return default, if any
152172
return state.jd;
@@ -176,6 +196,7 @@ export const makeBatchT = (startState, transitions) => {
176196
for (let i = 0; i < transitions.length; i++) {
177197
const input = transitions[i][0];
178198
const nextState = transitions[i][1];
199+
// if (!nextState.n && typeof input === 'string') { nextState.n = input; } // DEBUG
179200
makeT(startState, input, nextState);
180201
}
181202
};
@@ -193,6 +214,7 @@ export const makeBatchT = (startState, transitions) => {
193214
* @param {string} str
194215
* @param {Token} endStateFactory
195216
* @param {Token} defaultStateFactory
217+
* @return {State} the final state
196218
*/
197219
export const makeChainT = (state, str, endState, defaultStateFactory) => {
198220
let i = 0, len = str.length, nextState;
@@ -203,7 +225,7 @@ export const makeChainT = (state, str, endState, defaultStateFactory) => {
203225
i++;
204226
}
205227

206-
if (i >= len) { return []; } // no new tokens were added
228+
if (i >= len) { return state; } // no new tokens were added
207229

208230
while (i < len - 1) {
209231
nextState = defaultStateFactory();
@@ -213,4 +235,6 @@ export const makeChainT = (state, str, endState, defaultStateFactory) => {
213235
}
214236

215237
makeT(state, str[len - 1], endState);
238+
// if (!endState.n) { endState.n === str; } // DEBUG
239+
return endState;
216240
};

0 commit comments

Comments
 (0)