@@ -264,7 +264,8 @@ static String toHex16Bit(unsigned int x) {
264
264
return result;
265
265
}
266
266
267
- static String valueToQuotedStringN (const char * value, unsigned length) {
267
+ static String valueToQuotedStringN (const char * value, unsigned length,
268
+ bool emitUTF8 = false ) {
268
269
if (value == nullptr )
269
270
return " " ;
270
271
@@ -310,21 +311,31 @@ static String valueToQuotedStringN(const char* value, unsigned length) {
310
311
// Should add a flag to allow this compatibility mode and prevent this
311
312
// sequence from occurring.
312
313
default : {
313
- unsigned int cp = utf8ToCodepoint (c, end);
314
- // don't escape non-control characters
315
- // (short escape sequence are applied above)
316
- if (cp < 0x80 && cp >= 0x20 )
317
- result += static_cast <char >(cp);
318
- else if (cp < 0x10000 ) { // codepoint is in Basic Multilingual Plane
319
- result += " \\ u" ;
320
- result += toHex16Bit (cp);
321
- } else { // codepoint is not in Basic Multilingual Plane
322
- // convert to surrogate pair first
323
- cp -= 0x10000 ;
324
- result += " \\ u" ;
325
- result += toHex16Bit ((cp >> 10 ) + 0xD800 );
326
- result += " \\ u" ;
327
- result += toHex16Bit ((cp & 0x3FF ) + 0xDC00 );
314
+ if (emitUTF8) {
315
+ result += *c;
316
+ } else {
317
+ unsigned int codepoint = utf8ToCodepoint (c, end);
318
+ const unsigned int FIRST_NON_CONTROL_CODEPOINT = 0x20 ;
319
+ const unsigned int LAST_NON_CONTROL_CODEPOINT = 0x7F ;
320
+ const unsigned int FIRST_SURROGATE_PAIR_CODEPOINT = 0x10000 ;
321
+ // don't escape non-control characters
322
+ // (short escape sequence are applied above)
323
+ if (FIRST_NON_CONTROL_CODEPOINT <= codepoint &&
324
+ codepoint <= LAST_NON_CONTROL_CODEPOINT) {
325
+ result += static_cast <char >(codepoint);
326
+ } else if (codepoint <
327
+ FIRST_SURROGATE_PAIR_CODEPOINT) { // codepoint is in Basic
328
+ // Multilingual Plane
329
+ result += " \\ u" ;
330
+ result += toHex16Bit (codepoint);
331
+ } else { // codepoint is not in Basic Multilingual Plane
332
+ // convert to surrogate pair first
333
+ codepoint -= FIRST_SURROGATE_PAIR_CODEPOINT;
334
+ result += " \\ u" ;
335
+ result += toHex16Bit ((codepoint >> 10 ) + 0xD800 );
336
+ result += " \\ u" ;
337
+ result += toHex16Bit ((codepoint & 0x3FF ) + 0xDC00 );
338
+ }
328
339
}
329
340
} break ;
330
341
}
@@ -864,7 +875,8 @@ struct BuiltStyledStreamWriter : public StreamWriter {
864
875
BuiltStyledStreamWriter (String indentation, CommentStyle::Enum cs,
865
876
String colonSymbol, String nullSymbol,
866
877
String endingLineFeedSymbol, bool useSpecialFloats,
867
- unsigned int precision, PrecisionType precisionType);
878
+ bool emitUTF8, unsigned int precision,
879
+ PrecisionType precisionType);
868
880
int write (Value const & root, OStream* sout) override ;
869
881
870
882
private:
@@ -893,19 +905,20 @@ struct BuiltStyledStreamWriter : public StreamWriter {
893
905
bool addChildValues_ : 1 ;
894
906
bool indented_ : 1 ;
895
907
bool useSpecialFloats_ : 1 ;
908
+ bool emitUTF8_ : 1 ;
896
909
unsigned int precision_;
897
910
PrecisionType precisionType_;
898
911
};
899
912
BuiltStyledStreamWriter::BuiltStyledStreamWriter (
900
913
String indentation, CommentStyle::Enum cs, String colonSymbol,
901
914
String nullSymbol, String endingLineFeedSymbol, bool useSpecialFloats,
902
- unsigned int precision, PrecisionType precisionType)
915
+ bool emitUTF8, unsigned int precision, PrecisionType precisionType)
903
916
: rightMargin_(74 ), indentation_(std::move(indentation)), cs_(cs),
904
917
colonSymbol_(std::move(colonSymbol)), nullSymbol_(std::move(nullSymbol)),
905
918
endingLineFeedSymbol_(std::move(endingLineFeedSymbol)),
906
919
addChildValues_(false ), indented_(false ),
907
- useSpecialFloats_(useSpecialFloats), precision_(precision ),
908
- precisionType_(precisionType) {}
920
+ useSpecialFloats_(useSpecialFloats), emitUTF8_(emitUTF8 ),
921
+ precision_(precision), precisionType_(precisionType) {}
909
922
int BuiltStyledStreamWriter::write (Value const & root, OStream* sout) {
910
923
sout_ = sout;
911
924
addChildValues_ = false ;
@@ -942,7 +955,8 @@ void BuiltStyledStreamWriter::writeValue(Value const& value) {
942
955
char const * end;
943
956
bool ok = value.getString (&str, &end);
944
957
if (ok)
945
- pushValue (valueToQuotedStringN (str, static_cast <unsigned >(end - str)));
958
+ pushValue (valueToQuotedStringN (str, static_cast <unsigned >(end - str),
959
+ emitUTF8_));
946
960
else
947
961
pushValue (" " );
948
962
break ;
@@ -966,7 +980,7 @@ void BuiltStyledStreamWriter::writeValue(Value const& value) {
966
980
Value const & childValue = value[name];
967
981
writeCommentBeforeValue (childValue);
968
982
writeWithIndent (valueToQuotedStringN (
969
- name.data (), static_cast <unsigned >(name.length ())));
983
+ name.data (), static_cast <unsigned >(name.length ()), emitUTF8_ ));
970
984
*sout_ << colonSymbol_;
971
985
writeValue (childValue);
972
986
if (++it == members.end ()) {
@@ -1142,12 +1156,13 @@ StreamWriter::Factory::~Factory() = default;
1142
1156
StreamWriterBuilder::StreamWriterBuilder () { setDefaults (&settings_); }
1143
1157
StreamWriterBuilder::~StreamWriterBuilder () = default ;
1144
1158
StreamWriter* StreamWriterBuilder::newStreamWriter () const {
1145
- String indentation = settings_[" indentation" ].asString ();
1146
- String cs_str = settings_[" commentStyle" ].asString ();
1147
- String pt_str = settings_[" precisionType" ].asString ();
1148
- bool eyc = settings_[" enableYAMLCompatibility" ].asBool ();
1149
- bool dnp = settings_[" dropNullPlaceholders" ].asBool ();
1150
- bool usf = settings_[" useSpecialFloats" ].asBool ();
1159
+ const String indentation = settings_[" indentation" ].asString ();
1160
+ const String cs_str = settings_[" commentStyle" ].asString ();
1161
+ const String pt_str = settings_[" precisionType" ].asString ();
1162
+ const bool eyc = settings_[" enableYAMLCompatibility" ].asBool ();
1163
+ const bool dnp = settings_[" dropNullPlaceholders" ].asBool ();
1164
+ const bool usf = settings_[" useSpecialFloats" ].asBool ();
1165
+ const bool emitUTF8 = settings_[" emitUTF8" ].asBool ();
1151
1166
unsigned int pre = settings_[" precision" ].asUInt ();
1152
1167
CommentStyle::Enum cs = CommentStyle::All;
1153
1168
if (cs_str == " All" ) {
@@ -1179,7 +1194,7 @@ StreamWriter* StreamWriterBuilder::newStreamWriter() const {
1179
1194
pre = 17 ;
1180
1195
String endingLineFeedSymbol;
1181
1196
return new BuiltStyledStreamWriter (indentation, cs, colonSymbol, nullSymbol,
1182
- endingLineFeedSymbol, usf, pre,
1197
+ endingLineFeedSymbol, usf, emitUTF8, pre,
1183
1198
precisionType);
1184
1199
}
1185
1200
static void getValidWriterKeys (std::set<String>* valid_keys) {
@@ -1189,6 +1204,7 @@ static void getValidWriterKeys(std::set<String>* valid_keys) {
1189
1204
valid_keys->insert (" enableYAMLCompatibility" );
1190
1205
valid_keys->insert (" dropNullPlaceholders" );
1191
1206
valid_keys->insert (" useSpecialFloats" );
1207
+ valid_keys->insert (" emitUTF8" );
1192
1208
valid_keys->insert (" precision" );
1193
1209
valid_keys->insert (" precisionType" );
1194
1210
}
@@ -1220,6 +1236,7 @@ void StreamWriterBuilder::setDefaults(Json::Value* settings) {
1220
1236
(*settings)[" enableYAMLCompatibility" ] = false ;
1221
1237
(*settings)[" dropNullPlaceholders" ] = false ;
1222
1238
(*settings)[" useSpecialFloats" ] = false ;
1239
+ (*settings)[" emitUTF8" ] = false ;
1223
1240
(*settings)[" precision" ] = 17 ;
1224
1241
(*settings)[" precisionType" ] = " significant" ;
1225
1242
// ! [StreamWriterBuilderDefaults]
0 commit comments