@@ -48,14 +48,9 @@ public Token GetToken()
48
48
49
49
if ( code == '"' )
50
50
{
51
- if ( _currentIndex + 2 < _source . Length && _source . Span [ _currentIndex + 1 ] == '"' && _source . Span [ _currentIndex + 2 ] == '"' )
52
- {
53
- return ReadBlockString ( ) ;
54
- }
55
- else
56
- {
57
- return ReadString ( ) ;
58
- }
51
+ return _currentIndex + 2 < _source . Length && _source . Span [ _currentIndex + 1 ] == '"' && _source . Span [ _currentIndex + 2 ] == '"'
52
+ ? ReadBlockString ( )
53
+ : ReadString ( ) ;
59
54
}
60
55
61
56
return Throw_From_GetToken2 ( code ) ;
@@ -133,7 +128,11 @@ private Token ReadComment()
133
128
int start = _currentIndex ;
134
129
char code = NextCode ( ) ;
135
130
136
- Span < char > buffer = stackalloc char [ 4096 ] ;
131
+ // The buffer on the stack allows to get rid of intermediate heap allocations if the string
132
+ // 1) not too long
133
+ // or
134
+ // 2) does not contain escape sequences.
135
+ Span < char > buffer = stackalloc char [ Math . Min ( _source . Length - _currentIndex + 32 , 4096 ) ] ;
137
136
StringBuilder ? sb = null ;
138
137
139
138
int index = 0 ;
@@ -149,8 +148,7 @@ private Token ReadComment()
149
148
}
150
149
catch ( IndexOutOfRangeException ) // fallback to StringBuilder in case of buffer overflow
151
150
{
152
- if ( sb == null )
153
- sb = new StringBuilder ( buffer . Length * 2 ) ;
151
+ sb ??= new StringBuilder ( buffer . Length * 2 ) ;
154
152
155
153
for ( int i = 0 ; i < buffer . Length ; ++ i )
156
154
sb . Append ( buffer [ i ] ) ;
@@ -181,16 +179,25 @@ private Token ReadComment()
181
179
) ;
182
180
}
183
181
182
+ // TODO: this method can still be optimized no not allocate at all if block string:
183
+ //
184
+ // 1) not too long
185
+ // 2) has no escape sequences
186
+ // 3) has no '\r' characters
187
+ // 4) has no initial whitespace on each line, ignoring the first line (or, has no '\n' characters)
188
+ //
189
+ // In this case, ROM for the returned token represents unmodified part of the source ROM,
190
+ // so it can be just sliced from '_source' as you can see in more simple ReadString method.
184
191
private Token ReadBlockString ( )
185
192
{
186
- int start = _currentIndex += 2 ;
193
+ int start = _currentIndex += 2 ; // skip ""
187
194
char code = NextCode ( ) ;
188
195
189
- Span < char > buffer = stackalloc char [ 4096 ] ;
196
+ Span < char > buffer = stackalloc char [ Math . Min ( _source . Length - _currentIndex + 32 , 4096 ) ] ;
190
197
StringBuilder ? sb = null ;
191
198
192
199
int index = 0 ;
193
- bool escape = false ; //when the last character was \
200
+ bool escape = false ; // when the last character was \
194
201
bool lastWasCr = false ;
195
202
196
203
while ( _currentIndex < _source . Length )
@@ -200,30 +207,30 @@ private Token ReadBlockString()
200
207
Throw_From_ReadBlockString1 ( code ) ;
201
208
}
202
209
203
- //check for """
210
+ // check for """
204
211
if ( code == '"' && _currentIndex + 2 < _source . Length && _source . Span [ _currentIndex + 1 ] == '"' && _source . Span [ _currentIndex + 2 ] == '"' )
205
212
{
206
- //if last character was \ then go ahead and write out the """, skipping the \
213
+ // if last character was \ then go ahead and write out the """, skipping the \
207
214
if ( escape )
208
215
{
209
216
escape = false ;
210
217
}
211
218
else
212
219
{
213
- //end of blockstring
220
+ // end of block string
214
221
break ;
215
222
}
216
223
}
217
224
else if ( escape )
218
225
{
219
- //last character was \ so write the \ and then retry this character with escaped = false
226
+ // last character was \ so write the \ and then retry this character with escaped = false
220
227
code = '\\ ' ;
221
228
_currentIndex -- ;
222
229
escape = false ;
223
230
}
224
231
else if ( code == '\\ ' )
225
232
{
226
- //this character is a \ so don't write anything yet, but check the next character
233
+ // this character is a \ so don't write anything yet, but check the next character
227
234
escape = true ;
228
235
code = NextCode ( ) ;
229
236
lastWasCr = false ;
@@ -237,15 +244,14 @@ private Token ReadBlockString()
237
244
238
245
if ( ! ( lastWasCr && code == '\n ' ) )
239
246
{
240
- //write code
247
+ // write code
241
248
if ( index < buffer . Length )
242
249
{
243
250
buffer [ index ++ ] = code == '\r ' ? '\n ' : code ;
244
251
}
245
252
else // fallback to StringBuilder in case of buffer overflow
246
253
{
247
- if ( sb == null )
248
- sb = new StringBuilder ( buffer . Length * 2 ) ;
254
+ sb ??= new StringBuilder ( buffer . Length * 2 ) ;
249
255
250
256
for ( int i = 0 ; i < buffer . Length ; ++ i )
251
257
sb . Append ( buffer [ i ] ) ;
@@ -262,18 +268,18 @@ private Token ReadBlockString()
262
268
263
269
if ( _currentIndex >= _source . Length )
264
270
{
265
- Throw_From_ReadString2 ( ) ;
271
+ Throw_From_ReadBlockString2 ( ) ;
266
272
}
267
- _currentIndex += 2 ;
273
+ _currentIndex += 2 ; // skip ""
268
274
269
275
if ( sb != null )
270
276
{
271
277
for ( int i = 0 ; i < index ; ++ i )
272
278
sb . Append ( buffer [ i ] ) ;
273
279
}
274
280
275
- //at this point, if sb != null, then sb has the whole string, otherwise buffer (of length index) has the whole string
276
- //also, all line termination combinations have been replaced with LF
281
+ // at this point, if sb != null, then sb has the whole string, otherwise buffer (of length index) has the whole string
282
+ // also, all line termination combinations have been replaced with LF
277
283
278
284
ROM value ;
279
285
if ( sb != null )
@@ -297,11 +303,11 @@ private Token ReadBlockString()
297
303
298
304
static ROM ProcessBuffer ( Span < char > buffer )
299
305
{
300
- //scan string to determine maximum valid commonIndent value,
301
- //number of initial blank lines, and number of trailing blank lines
306
+ // scan string to determine maximum valid commonIndent value,
307
+ // number of initial blank lines, and number of trailing blank lines
302
308
int commonIndent = int . MaxValue ;
303
309
int initialBlankLines = 1 ;
304
- int skipLinesAfter ; //skip all text after line ###, as determined by the number of trailing blank lines
310
+ int skipLinesAfter ; // skip all text after line ###, as determined by the number of trailing blank lines
305
311
{
306
312
int trailingBlankLines = 0 ;
307
313
int line = 0 ;
@@ -347,8 +353,8 @@ static ROM ProcessBuffer(Span<char> buffer)
347
353
skipLinesAfter = lines - trailingBlankLines ;
348
354
}
349
355
350
- //step through the input, skipping the initial blank lines and the trailing blank lines,
351
- //and skipping the initial blank characters from the start of each line
356
+ // step through the input, skipping the initial blank lines and the trailing blank lines,
357
+ // and skipping the initial blank characters from the start of each line
352
358
Span < char > output = buffer . Length <= 4096 ? stackalloc char [ buffer . Length ] : new char [ buffer . Length ] ;
353
359
int outputIndex = 0 ;
354
360
{
@@ -373,7 +379,7 @@ static ROM ProcessBuffer(Span<char> buffer)
373
379
}
374
380
}
375
381
376
- //return the string value from the output buffer
382
+ // return the string value from the output buffer
377
383
return output . Slice ( 0 , outputIndex ) . ToString ( ) ;
378
384
}
379
385
}
@@ -383,7 +389,7 @@ private Token ReadString()
383
389
int start = _currentIndex ;
384
390
char code = NextCode ( ) ;
385
391
386
- Span < char > buffer = stackalloc char [ 4096 ] ;
392
+ Span < char > buffer = stackalloc char [ Math . Min ( _source . Length - _currentIndex + 32 , 4096 ) ] ;
387
393
StringBuilder ? sb = null ;
388
394
389
395
int index = 0 ;
@@ -404,8 +410,7 @@ private Token ReadString()
404
410
}
405
411
catch ( IndexOutOfRangeException ) // fallback to StringBuilder in case of buffer overflow
406
412
{
407
- if ( sb == null )
408
- sb = new StringBuilder ( buffer . Length * 2 ) ;
413
+ sb ??= new StringBuilder ( buffer . Length * 2 ) ;
409
414
410
415
for ( int i = 0 ; i < buffer . Length ; ++ i )
411
416
sb . Append ( buffer [ i ] ) ;
@@ -453,7 +458,12 @@ private void Throw_From_ReadString2()
453
458
454
459
private void Throw_From_ReadBlockString1 ( char code )
455
460
{
456
- throw new GraphQLSyntaxErrorException ( $ "Invalid character within BlockString: \\ u{ ( int ) code : D4} .", _source , _currentIndex ) ;
461
+ throw new GraphQLSyntaxErrorException ( $ "Invalid character within block string: \\ u{ ( int ) code : D4} .", _source , _currentIndex ) ;
462
+ }
463
+
464
+ private void Throw_From_ReadBlockString2 ( )
465
+ {
466
+ throw new GraphQLSyntaxErrorException ( "Unterminated block string." , _source , _currentIndex ) ;
457
467
}
458
468
459
469
// sets escaped only to true
0 commit comments