diff --git a/src/lib_json/json_writer.cpp b/src/lib_json/json_writer.cpp index 56ee65ef6..a3be0bb04 100644 --- a/src/lib_json/json_writer.cpp +++ b/src/lib_json/json_writer.cpp @@ -309,31 +309,31 @@ static String valueToQuotedStringN(const char* value, unsigned length, // Should add a flag to allow this compatibility mode and prevent this // sequence from occurring. default: { - if (emitUTF8) { - result += *c; - } else { - unsigned int codepoint = utf8ToCodepoint(c, end); - const unsigned int FIRST_NON_CONTROL_CODEPOINT = 0x20; - const unsigned int LAST_NON_CONTROL_CODEPOINT = 0x7F; - const unsigned int FIRST_SURROGATE_PAIR_CODEPOINT = 0x10000; - // don't escape non-control characters - // (short escape sequence are applied above) - if (FIRST_NON_CONTROL_CODEPOINT <= codepoint && - codepoint <= LAST_NON_CONTROL_CODEPOINT) { - result += static_cast(codepoint); - } else if (codepoint < - FIRST_SURROGATE_PAIR_CODEPOINT) { // codepoint is in Basic - // Multilingual Plane - result += "\\u"; - result += toHex16Bit(codepoint); - } else { // codepoint is not in Basic Multilingual Plane - // convert to surrogate pair first + const unsigned int FIRST_NON_CONTROL_CODEPOINT = 0x20; + const unsigned int LAST_NON_CONTROL_CODEPOINT = 0x7F; + const unsigned int FIRST_SURROGATE_PAIR_CODEPOINT = 0x10000; + + const auto appendHexChar = [&result](unsigned ch) { + result.append("\\u").append(toHex16Bit(ch)); + }; + + unsigned int codepoint = static_cast(*c); + if (codepoint > LAST_NON_CONTROL_CODEPOINT && !emitUTF8) { + codepoint = utf8ToCodepoint(c, end); + if (codepoint < FIRST_SURROGATE_PAIR_CODEPOINT) { + // codepoint is in Basic Multilingual Plane + appendHexChar(codepoint); + } else { + // codepoint is not in Basic Multilingual Plane + // convert to surrogate pair first codepoint -= FIRST_SURROGATE_PAIR_CODEPOINT; - result += "\\u"; - result += toHex16Bit((codepoint >> 10) + 0xD800); - result += "\\u"; - result += toHex16Bit((codepoint & 0x3FF) + 0xDC00); + appendHexChar(0xD800 + (codepoint >> 10)); + appendHexChar(0xDC00 + (codepoint & 0x3FF)); } + } else if (codepoint < FIRST_NON_CONTROL_CODEPOINT) { + appendHexChar(codepoint); + } else { + result += static_cast(codepoint); } } break; } diff --git a/src/test_lib_json/main.cpp b/src/test_lib_json/main.cpp index 73850cfd8..27c340378 100644 --- a/src/test_lib_json/main.cpp +++ b/src/test_lib_json/main.cpp @@ -2640,6 +2640,30 @@ JSONTEST_FIXTURE_LOCAL(StreamWriterTest, unicode) { "\"\\t\\n\\ud806\\udca1=\\u0133\\ud82c\\udd1b\\uff67\"\n}"); } +JSONTEST_FIXTURE_LOCAL(StreamWriterTest, controlChars) { + // Create a Json value containing UTF-8 string with some chars that need + // escape (tab,newline,control chars). + const Json::String expected( + "{\n\t\"test\" : " + "\"\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\b\\t\\n\\u000b\\f\\" + "r\\u000e\\u000f\\u0010\\u0011\\u0012\\u0013\\u0014\\u0015\\u0016\\u0017" + "\\u0018\\u0019\\u001a\\u001b\\u001c\\u001d\\u001e\\u001f\"\n}"); + + // Create a Json value containing string with controls chars that need escape. + Json::Value root; + root["test"] = "\x1\x2\x3\x4\x5\x6\x7\b\t\n\xB\f\r\xE\xF\x10\x11\x12\x13\x14" + "\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"; + + Json::StreamWriterBuilder b; + JSONTEST_ASSERT(Json::writeString(b, root) == expected); + + b.settings_["emitUTF8"] = true; + JSONTEST_ASSERT(Json::writeString(b, root) == expected); + + b.settings_["emitUTF8"] = false; + JSONTEST_ASSERT(Json::writeString(b, root) == expected); +} + struct ReaderTest : JsonTest::TestCase { void setStrictMode() { reader = std::unique_ptr(