@@ -262,6 +262,14 @@ static String toHex16Bit(unsigned int x) {
262
262
return result;
263
263
}
264
264
265
+ static void appendRaw (String& result, unsigned ch) {
266
+ result += static_cast <char >(ch);
267
+ }
268
+
269
+ static void appendHex (String& result, unsigned ch) {
270
+ result.append (" \\ u" ).append (toHex16Bit (ch));
271
+ }
272
+
265
273
static String valueToQuotedStringN (const char * value, unsigned length,
266
274
bool emitUTF8 = false ) {
267
275
if (value == nullptr )
@@ -309,31 +317,25 @@ static String valueToQuotedStringN(const char* value, unsigned length,
309
317
// Should add a flag to allow this compatibility mode and prevent this
310
318
// sequence from occurring.
311
319
default : {
320
+ unsigned codepoint;
312
321
if (emitUTF8) {
313
- result += *c ;
322
+ codepoint = static_cast < unsigned char >(*c) ;
314
323
} else {
315
- unsigned int codepoint = utf8ToCodepoint (c, end);
316
- const unsigned int FIRST_NON_CONTROL_CODEPOINT = 0x20 ;
317
- const unsigned int LAST_NON_CONTROL_CODEPOINT = 0x7F ;
318
- const unsigned int FIRST_SURROGATE_PAIR_CODEPOINT = 0x10000 ;
319
- // don't escape non-control characters
320
- // (short escape sequence are applied above)
321
- if (FIRST_NON_CONTROL_CODEPOINT <= codepoint &&
322
- codepoint <= LAST_NON_CONTROL_CODEPOINT) {
323
- result += static_cast <char >(codepoint);
324
- } else if (codepoint <
325
- FIRST_SURROGATE_PAIR_CODEPOINT) { // codepoint is in Basic
326
- // Multilingual Plane
327
- result += " \\ u" ;
328
- result += toHex16Bit (codepoint);
329
- } else { // codepoint is not in Basic Multilingual Plane
330
- // convert to surrogate pair first
331
- codepoint -= FIRST_SURROGATE_PAIR_CODEPOINT;
332
- result += " \\ u" ;
333
- result += toHex16Bit ((codepoint >> 10 ) + 0xD800 );
334
- result += " \\ u" ;
335
- result += toHex16Bit ((codepoint & 0x3FF ) + 0xDC00 );
336
- }
324
+ codepoint = utf8ToCodepoint (c, end); // modifies `c`
325
+ }
326
+
327
+ if (codepoint < 0x20 ) {
328
+ appendHex (result, codepoint);
329
+ } else if (codepoint < 0x80 || emitUTF8) {
330
+ appendRaw (result, codepoint);
331
+ } else if (codepoint < 0x10000 ) {
332
+ // Basic Multilingual Plane
333
+ appendHex (result, codepoint);
334
+ } else {
335
+ // Extended Unicode. Encode 20 bits as a surrogate pair.
336
+ codepoint -= 0x10000 ;
337
+ appendHex (result, 0xd800 + ((codepoint >> 10 ) & 0x3ff ));
338
+ appendHex (result, 0xdc00 + (codepoint & 0x3ff ));
337
339
}
338
340
} break ;
339
341
}
@@ -864,7 +866,8 @@ struct CommentStyle {
864
866
// / Decide whether to write comments.
865
867
enum Enum {
866
868
None, // /< Drop all comments.
867
- Most, // /< Recover odd behavior of previous versions (not implemented yet).
869
+ Most, // /< Recover odd behavior of previous versions (not implemented
870
+ // /< yet).
868
871
All // /< Keep all comments.
869
872
};
870
873
};
0 commit comments