|
| 1 | +const { FAILURE } = require('./constants') |
| 2 | +const Errors = require('./errors') |
| 3 | + |
| 4 | +/** |
| 5 | + * @typedef {'scheme-start-state'|'no-scheme-state'|'scheme-state'} State |
| 6 | + */ |
| 7 | + |
| 8 | +/** |
| 9 | + * @property {string} input |
| 10 | + * @property {State} state |
| 11 | + * @property {string=} base |
| 12 | + */ |
| 13 | +class URLStateMachine { |
| 14 | + |
| 15 | + // A validation error does not mean that the parser terminates. |
| 16 | + // Termination of a parser is always stated explicitly, e.g., through a return statement. |
| 17 | + hasValidationError = false |
| 18 | + |
| 19 | + /** |
| 20 | + * @constructor |
| 21 | + * @param {string} input |
| 22 | + * @param {string=} base |
| 23 | + * @param {State=} stateOverride |
| 24 | + */ |
| 25 | + constructor(input, base, stateOverride) { |
| 26 | + if (!input || typeof input !== 'string') { |
| 27 | + throw Errors.INVALID_ARGUMENT |
| 28 | + } |
| 29 | + |
| 30 | + this.setInput(input) |
| 31 | + this.setState(stateOverride || 'scheme-start-state') |
| 32 | + this.buffer = '' |
| 33 | + this.base = base || null |
| 34 | + this.url = { |
| 35 | + scheme: '', |
| 36 | + username: '', |
| 37 | + password: '', |
| 38 | + host: null, |
| 39 | + port: null, |
| 40 | + path: [], |
| 41 | + query: null, |
| 42 | + fragment: null, |
| 43 | + } |
| 44 | + this.stateOverride = stateOverride |
| 45 | + this.atSignSeen = false |
| 46 | + this.insideBrackets = false |
| 47 | + this.passwordTokenSeen = false |
| 48 | + this.pointer = 0 |
| 49 | + this.failure = false |
| 50 | + |
| 51 | + this.iterateInput() |
| 52 | + } |
| 53 | + |
| 54 | + /** |
| 55 | + * @private |
| 56 | + * @function setInput |
| 57 | + * @description Sanitize the input and set it internally |
| 58 | + * @param {string} input |
| 59 | + */ |
| 60 | + setInput(input) { |
| 61 | + // If input contains any leading or trailing C0 control or space, validation error. |
| 62 | + const control_space_removed = input.replace(/[\u0000-\u001F\u007F-\u009F]/g, '') |
| 63 | + |
| 64 | + if (control_space_removed !== input) { |
| 65 | + this.hasValidationError = true |
| 66 | + } |
| 67 | + |
| 68 | + // If input contains any ASCII tab or newline, validation error. |
| 69 | + const tab_newline_removed = control_space_removed.replace(/[\u0009\u000A\u000D]/ug, '') |
| 70 | + |
| 71 | + if (control_space_removed !== tab_newline_removed) { |
| 72 | + this.hasValidationError = true |
| 73 | + } |
| 74 | + |
| 75 | + this.input = tab_newline_removed |
| 76 | + } |
| 77 | + |
| 78 | + /** |
| 79 | + * @private |
| 80 | + * @function setState |
| 81 | + * @description Changes the current state of the state machine |
| 82 | + * @param {State} state |
| 83 | + */ |
| 84 | + setState(state) { |
| 85 | + this.state = state |
| 86 | + } |
| 87 | + |
| 88 | + /** |
| 89 | + * @private |
| 90 | + * @function iterateInput |
| 91 | + * @description Iterate through the input and update state |
| 92 | + */ |
| 93 | + iterateInput() { |
| 94 | + const codes = Array.from(this.input, c => [c, c.codePointAt(0)]) |
| 95 | + |
| 96 | + while (this.pointer < codes.length) { |
| 97 | + const [character, code] = codes[this.pointer] |
| 98 | + const result = this.processState(code, character) |
| 99 | + |
| 100 | + if (result === FAILURE) { |
| 101 | + this.failure = true |
| 102 | + break |
| 103 | + } else if (result === false) { |
| 104 | + break |
| 105 | + } |
| 106 | + |
| 107 | + this.pointer++ |
| 108 | + } |
| 109 | + } |
| 110 | + |
| 111 | + /** |
| 112 | + * @function |
| 113 | + * @param {number} code |
| 114 | + * @param {String} character |
| 115 | + * @returns {boolean|Symbol} |
| 116 | + */ |
| 117 | + processState(code, character) { |
| 118 | + switch (this.state) { |
| 119 | + case 'scheme-start-state': |
| 120 | + return this.schemeStartState(code, character) |
| 121 | + case 'no-scheme-state': |
| 122 | + return this.noSchemeState(code, character) |
| 123 | + } |
| 124 | + } |
| 125 | + |
| 126 | + /** |
| 127 | + * @private |
| 128 | + * @function schemeStartState |
| 129 | + * @param {number} code |
| 130 | + * @param {string} character |
| 131 | + * @returns {boolean|Symbol} |
| 132 | + */ |
| 133 | + schemeStartState(code, character) { |
| 134 | + // If c is an ASCII alpha, append c, lowercased, to buffer, and set state to scheme state. |
| 135 | + if ((code >= 0x41 && code <= 0x5A) || (code >= 0x61 && code <= 0x7A)) { |
| 136 | + this.buffer += character.toLowerCase() |
| 137 | + this.setState('scheme-state') |
| 138 | + } else if (!this.stateOverride) { |
| 139 | + // Otherwise, if state override is not given, set state to no scheme state and decrease pointer by 1. |
| 140 | + this.setState('no-scheme-state') |
| 141 | + this.pointer = this.pointer - 1 |
| 142 | + } else { |
| 143 | + // Otherwise, validation error, return failure. |
| 144 | + this.hasValidationError = true |
| 145 | + return FAILURE |
| 146 | + } |
| 147 | + |
| 148 | + return true |
| 149 | + } |
| 150 | + |
| 151 | + /** |
| 152 | + * @private |
| 153 | + * @function noSchemeState |
| 154 | + * @param {number} code |
| 155 | + * @param {string} character |
| 156 | + */ |
| 157 | + noSchemeState(code, character) { |
| 158 | + |
| 159 | + } |
| 160 | +} |
0 commit comments