open-source-parsers · TheStormN · May 19, 2020 · May 20, 2020 · May 20, 2020 · May 21, 2020
diff --git a/src/lib_json/json_writer.cpp b/src/lib_json/json_writer.cpp
@@ -309,31 +309,26 @@ static String valueToQuotedStringN(const char* value, unsigned length,
     // Should add a flag to allow this compatibility mode and prevent this
     // sequence from occurring.
     default: {
-      if (emitUTF8) {
-        result += *c;
-      } else {
-        unsigned int codepoint = utf8ToCodepoint(c, end);
-        const unsigned int FIRST_NON_CONTROL_CODEPOINT = 0x20;
-        const unsigned int LAST_NON_CONTROL_CODEPOINT = 0x7F;
-        const unsigned int FIRST_SURROGATE_PAIR_CODEPOINT = 0x10000;
-        // don't escape non-control characters
-        // (short escape sequence are applied above)
-        if (FIRST_NON_CONTROL_CODEPOINT <= codepoint &&
-            codepoint <= LAST_NON_CONTROL_CODEPOINT) {
-          result += static_cast<char>(codepoint);
-        } else if (codepoint <
-                   FIRST_SURROGATE_PAIR_CODEPOINT) { // codepoint is in Basic
-                                                     // Multilingual Plane
-          result += "\\u";
-          result += toHex16Bit(codepoint);
-        } else { // codepoint is not in Basic Multilingual Plane
-                 // convert to surrogate pair first
-          codepoint -= FIRST_SURROGATE_PAIR_CODEPOINT;
-          result += "\\u";
-          result += toHex16Bit((codepoint >> 10) + 0xD800);
-          result += "\\u";
-          result += toHex16Bit((codepoint & 0x3FF) + 0xDC00);
+      const auto appendHexChar = [&result](unsigned ch) {
+        result.append("\\u").append(toHex16Bit(ch));
+      };
+
+      unsigned codepoint = static_cast<unsigned>(*c);
+      if (codepoint > 0x7F && !emitUTF8) {
+        codepoint = utf8ToCodepoint(c, end);
+        if (codepoint < 0x10000) {
+          // codepoint is in Basic Multilingual Plane
+          appendHexChar(codepoint);
+        } else {
+          // codepoint is not in Basic Multilingual Plane
+          codepoint -= 0x10000;
+          appendHexChar(0xD800 + (codepoint >> 10));
+          appendHexChar(0xDC00 + (codepoint & 0x3FF));
         }
+      } else if (codepoint < 0x20) {
+        appendHexChar(codepoint);
+      } else {
+        result += static_cast<char>(codepoint);
       }
     } break;
     }

diff --git a/src/test_lib_json/main.cpp b/src/test_lib_json/main.cpp
@@ -2640,6 +2640,30 @@ JSONTEST_FIXTURE_LOCAL(StreamWriterTest, unicode) {
                   "\"\\t\\n\\ud806\\udca1=\\u0133\\ud82c\\udd1b\\uff67\"\n}");
 }
 
+JSONTEST_FIXTURE_LOCAL(StreamWriterTest, controlChars) {
+  // Create a Json value containing UTF-8 string with some chars that need
+  // escape (tab,newline,control chars).
+  const Json::String expected(
+      "{\n\t\"test\" : "
+      "\"\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\b\\t\\n\\u000b\\f\\"
+      "r\\u000e\\u000f\\u0010\\u0011\\u0012\\u0013\\u0014\\u0015\\u0016\\u0017"
+      "\\u0018\\u0019\\u001a\\u001b\\u001c\\u001d\\u001e\\u001f\"\n}");
+
+  // Create a Json value containing string with controls chars that need escape.
+  Json::Value root;
+  root["test"] = "\x1\x2\x3\x4\x5\x6\x7\b\t\n\xB\f\r\xE\xF\x10\x11\x12\x13\x14"
+                 "\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F";
+
+  Json::StreamWriterBuilder b;
+  JSONTEST_ASSERT(Json::writeString(b, root) == expected);
+
+  b.settings_["emitUTF8"] = true;
+  JSONTEST_ASSERT(Json::writeString(b, root) == expected);
+
+  b.settings_["emitUTF8"] = false;
+  JSONTEST_ASSERT(Json::writeString(b, root) == expected);
+}
+
 struct ReaderTest : JsonTest::TestCase {
   void setStrictMode() {
     reader = std::unique_ptr<Json::Reader>(