Skip to content

Commit 1ffdbaf

Browse files
authored
Add Json configuration flag to allow comments (#2592)
in C/Java style for both string and stream parser. This flag, together with allowTrailingCommas and isLenient, will help to cover most use-cases for Json5, for example, configuration files. Fixes #2221 Fixes #797
1 parent f242bb5 commit 1ffdbaf

File tree

16 files changed

+588
-83
lines changed

16 files changed

+588
-83
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* Copyright 2017-2024 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
3+
*/
4+
5+
package kotlinx.benchmarks.json
6+
7+
import kotlinx.benchmarks.model.*
8+
import kotlinx.serialization.json.*
9+
import org.openjdk.jmh.annotations.*
10+
import java.io.*
11+
import java.util.concurrent.*
12+
13+
@Warmup(iterations = 7, time = 1)
14+
@Measurement(iterations = 7, time = 1)
15+
@BenchmarkMode(Mode.Throughput)
16+
@OutputTimeUnit(TimeUnit.SECONDS)
17+
@State(Scope.Benchmark)
18+
@Fork(2)
19+
open class TwitterFeedCommentsBenchmark {
20+
val inputBytes = TwitterFeedBenchmark::class.java.getResource("/twitter_macro.json").readBytes()
21+
private val input = inputBytes.decodeToString()
22+
private val inputWithComments = prepareInputWithComments(input)
23+
private val inputWithCommentsBytes = inputWithComments.encodeToByteArray()
24+
25+
private val jsonComments = Json { ignoreUnknownKeys = true; allowComments = true; }
26+
private val jsonNoComments = Json { ignoreUnknownKeys = true; allowComments = false; }
27+
28+
fun prepareInputWithComments(inp: String): String {
29+
val result = inp.lineSequence().map { s ->
30+
// "id", "in_...", "is_...", etc
31+
if (!s.trimStart().startsWith("\"i")) s else "$s // json comment"
32+
}.joinToString("\n")
33+
assert(result.contains("// json comment"))
34+
return result
35+
}
36+
37+
@Setup
38+
fun init() {
39+
// Explicitly invoking both variants before benchmarking so we know that both parser implementation classes are loaded
40+
require("foobar" == jsonComments.decodeFromString<String>("\"foobar\""))
41+
require("foobar" == jsonNoComments.decodeFromString<String>("\"foobar\""))
42+
}
43+
44+
// The difference with TwitterFeedBenchmark.decodeMicroTwitter shows if we slow down when both StringJsonLexer and CommentsJsonLexer
45+
// are loaded by JVM. Should be almost non-existent on modern JVMs (but on OpenJDK-Corretto-11.0.14.1 there is one. 17 is fine.)
46+
@Benchmark
47+
fun decodeMicroTwitter() = jsonNoComments.decodeFromString(MicroTwitterFeed.serializer(), input)
48+
49+
// The difference with this.decodeMicroTwitter shows if we slow down when comments are enabled but no comments present
50+
// in the input. It is around 13% slower than without comments support, mainly because skipWhitespaces is a separate function
51+
// that sometimes is not inlined by JIT.
52+
@Benchmark
53+
fun decodeMicroTwitterCommentSupport() = jsonComments.decodeFromString(MicroTwitterFeed.serializer(), input)
54+
55+
// Shows how much actual skipping of the comments takes: around 10%.
56+
@Benchmark
57+
fun decodeMicroTwitterCommentInData() = jsonComments.decodeFromString(MicroTwitterFeed.serializer(), inputWithComments)
58+
59+
@Benchmark
60+
fun decodeMicroTwitterCommentSupportStream() = jsonComments.decodeFromStream(MicroTwitterFeed.serializer(), ByteArrayInputStream(inputBytes))
61+
62+
@Benchmark
63+
fun decodeMicroTwitterCommentInDataStream() = jsonComments.decodeFromStream(MicroTwitterFeed.serializer(), ByteArrayInputStream(inputWithCommentsBytes))
64+
}

benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/TwitterFeedStreamBenchmark.kt

+9
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,14 @@ open class TwitterFeedStreamBenchmark {
3030
jacksonObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
3131

3232

33+
@Setup
34+
fun init() {
35+
// Explicitly invoking decodeFromStream before benchmarking so we know that both parser implementation classes are loaded
36+
require("foobar" == Json.decodeFromStream<String>(ByteArrayInputStream("\"foobar\"".encodeToByteArray())))
37+
require("foobar" == Json.decodeFromString<String>("\"foobar\""))
38+
}
39+
40+
3341
private val inputStream: InputStream
3442
get() = ByteArrayInputStream(bytes)
3543
private val outputStream: OutputStream
@@ -59,6 +67,7 @@ open class TwitterFeedStreamBenchmark {
5967
}
6068
}
6169

70+
// Difference with TwitterFeedBenchmark.decodeMicroTwitter shows how heavy Java's standard UTF-8 decoding actually is.
6271
@Benchmark
6372
fun decodeMicroTwitterReadText(): MicroTwitterFeed {
6473
return inputStream.use {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
/*
2+
* Copyright 2017-2024 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
3+
*/
4+
5+
package kotlinx.serialization.features
6+
7+
import kotlinx.serialization.*
8+
import kotlinx.serialization.json.*
9+
import kotlin.test.*
10+
11+
class JsonCommentsTest: JsonTestBase() {
12+
val json = Json(default) {
13+
allowComments = true
14+
allowTrailingComma = true
15+
}
16+
17+
val withLenient = Json(json) {
18+
isLenient = true
19+
ignoreUnknownKeys = true
20+
}
21+
22+
@Test
23+
fun testBasic() = parametrizedTest { mode ->
24+
val inputBlock = """{"data": "b" /*value b*/ }"""
25+
val inputLine = "{\"data\": \"b\" // value b \n }"
26+
assertEquals(StringData("b"), json.decodeFromString(inputBlock, mode))
27+
assertEquals(StringData("b"), json.decodeFromString(inputLine, mode))
28+
}
29+
30+
@Serializable
31+
data class Target(val key: String, val key2: List<Int>, val key3: NestedTarget, val key4: String)
32+
33+
@Serializable
34+
data class NestedTarget(val nestedKey: String)
35+
36+
private fun target(key4: String): Target = Target("value", listOf(1, 2), NestedTarget("foo"), key4)
37+
38+
@Test
39+
fun testAllBlocks() = parametrizedTest { mode ->
40+
val input = """{ /*beginning*/
41+
/*before key*/ "key" /*after key*/ : /*after colon*/ "value" /*before comma*/,
42+
"key2": [ /*array1*/ 1, /*array2*/ 2, /*end array*/],
43+
"key3": { /*nested obj*/ "nestedKey": "foo"} /*after nested*/,
44+
"key4": "/*comment inside quotes is a part of value*/",
45+
/*before end*/
46+
}"""
47+
assertEquals(target("/*comment inside quotes is a part of value*/"), json.decodeFromString(input, mode))
48+
}
49+
50+
@Test
51+
fun testAllLines() = parametrizedTest { mode ->
52+
val input = """{ //beginning
53+
//before key
54+
"key" // after key
55+
: // after colon
56+
"value" //before comma
57+
,
58+
"key2": [ //array1
59+
1, //array2
60+
2, //end array
61+
],
62+
"key3": { //nested obj
63+
"nestedKey": "foo"
64+
} , //after nested
65+
"key4": "//comment inside quotes is a part of value",
66+
//before end
67+
}"""
68+
assertEquals(target("//comment inside quotes is a part of value"), json.decodeFromString(input, mode))
69+
}
70+
71+
@Test
72+
fun testMixed() = parametrizedTest { mode ->
73+
val input = """{ // begin
74+
"key": "value", // after
75+
"key2": /* array */ /*another comment */ [1, 2],
76+
"key3": /* //this is a block comment */ { "nestedKey": // /*this is a line comment*/ "bar"
77+
"foo" },
78+
"key4": /* nesting block comments /* not supported */ "*/"
79+
/* end */}"""
80+
assertEquals(target("*/"), json.decodeFromString(input, mode))
81+
}
82+
83+
@Test
84+
fun testWeirdKeys() {
85+
val map = mapOf(
86+
"// comment inside quotes is a part of key" to "/* comment inside quotes is a part of value */",
87+
"/*key */" to "/* value",
88+
"/* key" to "*/ value"
89+
)
90+
val input = """/* before begin */
91+
{
92+
${map.entries.joinToString(separator = ",\n") { (k, v) -> "\"$k\" : \"$v\"" }}
93+
} // after end
94+
""".trimIndent()
95+
val afterMap = json.parseToJsonElement(input).jsonObject.mapValues { (_, v) ->
96+
v as JsonPrimitive
97+
assertTrue(v.isString)
98+
v.content
99+
}
100+
assertEquals(map, afterMap)
101+
}
102+
103+
@Test
104+
fun testWithLenient() = parametrizedTest { mode ->
105+
val input = """{ //beginning
106+
//before key
107+
key // after key
108+
: // after colon
109+
value //before comma
110+
,
111+
key2: [ //array1
112+
1, //array2
113+
2, //end array
114+
],
115+
key3: { //nested obj
116+
nestedKey: "foo"
117+
} , //after nested
118+
key4: value//comment_cannot_break_value_apart,
119+
key5: //comment without quotes where new token expected is still a comment
120+
value5,
121+
//before end
122+
}"""
123+
assertEquals(target("value//comment_cannot_break_value_apart"), withLenient.decodeFromString(input, mode))
124+
}
125+
126+
@Test
127+
fun testUnclosedCommentsErrorMsg() = parametrizedTest { mode ->
128+
val input = """{"data": "x"} // no newline"""
129+
assertEquals(StringData("x"), json.decodeFromString<StringData>(input, mode))
130+
val input2 = """{"data": "x"} /* no endblock"""
131+
assertFailsWith<SerializationException>("Expected end of the block comment: \"*/\", but had EOF instead at path: \$") {
132+
json.decodeFromString<StringData>(input2, mode)
133+
}
134+
}
135+
136+
private val lexerBatchSize = 16 * 1024
137+
138+
@Test
139+
fun testVeryLargeComments() = parametrizedTest { mode ->
140+
val strLen = lexerBatchSize * 2 + 42
141+
val inputLine = """{"data": //a""" + "a".repeat(strLen) + "\n\"x\"}"
142+
assertEquals(StringData("x"), json.decodeFromString<StringData>(inputLine, mode))
143+
val inputBlock = """{"data": /*a""" + "a".repeat(strLen) + "*/\"x\"}"
144+
assertEquals(StringData("x"), json.decodeFromString<StringData>(inputBlock, mode))
145+
}
146+
147+
@Test
148+
fun testCommentsOnThresholdEdge() = parametrizedTest { mode ->
149+
val inputPrefix = """{"data": /*a"""
150+
// Here, we test the situation when closing */ is divided in buffer:
151+
// * fits in the initial buffer, but / is not.
152+
// E.g. situation with batches looks like this: ['{', '"', 'd', ..., '*'], ['/', ...]
153+
val bloatSize = lexerBatchSize - inputPrefix.length - 1
154+
val inputLine = inputPrefix + "a".repeat(bloatSize) + "*/\"x\"}"
155+
assertEquals(StringData("x"), json.decodeFromString<StringData>(inputLine, mode))
156+
157+
// Test when * is unclosed and last in buffer:
158+
val inputLine2 = inputPrefix + "a".repeat(bloatSize) + "*"
159+
assertFailsWith<SerializationException>("Expected end of the block comment: \"*/\", but had EOF instead at path: \$") {
160+
json.decodeFromString<StringData>(inputLine2, mode)
161+
}
162+
163+
}
164+
165+
}

formats/json-tests/jvmTest/resources/spec_cases/listing.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -228,4 +228,4 @@ y_structure_lonely_true.json
228228
y_structure_string_empty.json
229229
y_structure_trailing_newline.json
230230
y_structure_true_in_array.json
231-
y_structure_whitespace_array.json
231+
y_structure_whitespace_array.json
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
// Non-spec inputs that we accept with allowComments = true setting:
2+
n_object_trailing_comment.json
3+
n_object_trailing_comment_slash_open.json
4+
n_structure_object_with_comment.json
Binary file not shown.

formats/json/api/kotlinx-serialization-json.api

+3
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ public final class kotlinx/serialization/json/JsonArraySerializer : kotlinx/seri
9494
}
9595

9696
public final class kotlinx/serialization/json/JsonBuilder {
97+
public final fun getAllowComments ()Z
9798
public final fun getAllowSpecialFloatingPointValues ()Z
9899
public final fun getAllowStructuredMapKeys ()Z
99100
public final fun getAllowTrailingComma ()Z
@@ -111,6 +112,7 @@ public final class kotlinx/serialization/json/JsonBuilder {
111112
public final fun getUseAlternativeNames ()Z
112113
public final fun getUseArrayPolymorphism ()Z
113114
public final fun isLenient ()Z
115+
public final fun setAllowComments (Z)V
114116
public final fun setAllowSpecialFloatingPointValues (Z)V
115117
public final fun setAllowStructuredMapKeys (Z)V
116118
public final fun setAllowTrailingComma (Z)V
@@ -141,6 +143,7 @@ public synthetic class kotlinx/serialization/json/JsonClassDiscriminator$Impl :
141143

142144
public final class kotlinx/serialization/json/JsonConfiguration {
143145
public fun <init> ()V
146+
public final fun getAllowComments ()Z
144147
public final fun getAllowSpecialFloatingPointValues ()Z
145148
public final fun getAllowStructuredMapKeys ()Z
146149
public final fun getAllowTrailingComma ()Z

formats/json/commonMain/src/kotlinx/serialization/json/Json.kt

+19-2
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,13 @@ public sealed class Json(
102102
* @throws [IllegalArgumentException] if the decoded input cannot be represented as a valid instance of type [T]
103103
*/
104104
public final override fun <T> decodeFromString(deserializer: DeserializationStrategy<T>, @FormatLanguage("json", "", "") string: String): T {
105-
val lexer = StringJsonLexer(string)
105+
val lexer = StringJsonLexer(this, string)
106106
val input = StreamingJsonDecoder(this, WriteMode.OBJ, lexer, deserializer.descriptor, null)
107107
val result = input.decodeSerializableValue(deserializer)
108108
lexer.expectEof()
109109
return result
110110
}
111+
111112
/**
112113
* Serializes the given [value] into an equivalent [JsonElement] using the given [serializer]
113114
*
@@ -385,6 +386,22 @@ public class JsonBuilder internal constructor(json: Json) {
385386
@ExperimentalSerializationApi
386387
public var allowTrailingComma: Boolean = json.configuration.allowTrailingComma
387388

389+
/**
390+
* Allows parser to accept C/Java-style comments in JSON input.
391+
*
392+
* Comments are being skipped and are not stored anywhere; this setting does not affect encoding in any way.
393+
*
394+
* More specifically, a comment is a substring that is not a part of JSON key or value, conforming to one of those:
395+
*
396+
* 1. Starts with `//` characters and ends with a newline character `\n`.
397+
* 2. Starts with `/*` characters and ends with `*/` characters. Nesting block comments
398+
* is not supported: no matter how many `/*` characters you have, first `*/` will end the comment.
399+
*
400+
* `false` by default.
401+
*/
402+
@ExperimentalSerializationApi
403+
public var allowComments: Boolean = json.configuration.allowComments
404+
388405
/**
389406
* Module with contextual and polymorphic serializers to be used in the resulting [Json] instance.
390407
*
@@ -422,7 +439,7 @@ public class JsonBuilder internal constructor(json: Json) {
422439
allowStructuredMapKeys, prettyPrint, explicitNulls, prettyPrintIndent,
423440
coerceInputValues, useArrayPolymorphism,
424441
classDiscriminator, allowSpecialFloatingPointValues, useAlternativeNames,
425-
namingStrategy, decodeEnumsCaseInsensitive, allowTrailingComma, classDiscriminatorMode
442+
namingStrategy, decodeEnumsCaseInsensitive, allowTrailingComma, allowComments, classDiscriminatorMode
426443
)
427444
}
428445
}

formats/json/commonMain/src/kotlinx/serialization/json/JsonConfiguration.kt

+3-1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ public class JsonConfiguration @OptIn(ExperimentalSerializationApi::class) inter
3838
@ExperimentalSerializationApi
3939
public val allowTrailingComma: Boolean = false,
4040
@ExperimentalSerializationApi
41+
public val allowComments: Boolean = false,
42+
@ExperimentalSerializationApi
4143
public var classDiscriminatorMode: ClassDiscriminatorMode = ClassDiscriminatorMode.POLYMORPHIC,
4244
) {
4345

@@ -49,7 +51,7 @@ public class JsonConfiguration @OptIn(ExperimentalSerializationApi::class) inter
4951
"prettyPrintIndent='$prettyPrintIndent', coerceInputValues=$coerceInputValues, useArrayPolymorphism=$useArrayPolymorphism, " +
5052
"classDiscriminator='$classDiscriminator', allowSpecialFloatingPointValues=$allowSpecialFloatingPointValues, " +
5153
"useAlternativeNames=$useAlternativeNames, namingStrategy=$namingStrategy, decodeEnumsCaseInsensitive=$decodeEnumsCaseInsensitive, " +
52-
"allowTrailingComma=$allowTrailingComma, classDiscriminatorMode=$classDiscriminatorMode)"
54+
"allowTrailingComma=$allowTrailingComma, allowComments=$allowComments, classDiscriminatorMode=$classDiscriminatorMode)"
5355
}
5456
}
5557

formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonStreams.kt

+2-2
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ public fun <T> decodeByReader(
3737
deserializer: DeserializationStrategy<T>,
3838
reader: InternalJsonReader
3939
): T {
40-
val lexer = ReaderJsonLexer(reader)
40+
val lexer = ReaderJsonLexer(json, reader)
4141
try {
4242
val input = StreamingJsonDecoder(json, WriteMode.OBJ, lexer, deserializer.descriptor, null)
4343
val result = input.decodeSerializableValue(deserializer)
@@ -56,7 +56,7 @@ public fun <T> decodeToSequenceByReader(
5656
deserializer: DeserializationStrategy<T>,
5757
format: DecodeSequenceMode = DecodeSequenceMode.AUTO_DETECT
5858
): Sequence<T> {
59-
val lexer = ReaderJsonLexer(reader, CharArray(BATCH_SIZE)) // Unpooled buffer due to lazy nature of sequence
59+
val lexer = ReaderJsonLexer(json, reader, CharArray(BATCH_SIZE)) // Unpooled buffer due to lazy nature of sequence
6060
val iter = JsonIterator(format, json, lexer, deserializer)
6161
return Sequence { iter }.constrainOnce()
6262
}

formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt

+1-1
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ public fun <T> decodeStringToJsonTree(
359359
deserializer: DeserializationStrategy<T>,
360360
source: String
361361
): JsonElement {
362-
val lexer = StringJsonLexer(source)
362+
val lexer = StringJsonLexer(json, source)
363363
val input = StreamingJsonDecoder(json, WriteMode.OBJ, lexer, deserializer.descriptor, null)
364364
val tree = input.decodeJsonElement()
365365
lexer.expectEof()

formats/json/commonMain/src/kotlinx/serialization/json/internal/TreeJsonDecoder.kt

+6-3
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,12 @@ private sealed class AbstractJsonTreeDecoder(
154154
return this as? JsonLiteral ?: throw JsonDecodingException(-1, "Unexpected 'null' literal when non-nullable $type was expected")
155155
}
156156

157-
override fun decodeTaggedInline(tag: String, inlineDescriptor: SerialDescriptor): Decoder =
158-
if (inlineDescriptor.isUnsignedNumber) JsonDecoderForUnsignedTypes(StringJsonLexer(getPrimitiveValue(tag).content), json)
159-
else super.decodeTaggedInline(tag, inlineDescriptor)
157+
override fun decodeTaggedInline(tag: String, inlineDescriptor: SerialDescriptor): Decoder {
158+
return if (inlineDescriptor.isUnsignedNumber) {
159+
val lexer = StringJsonLexer(json, getPrimitiveValue(tag).content)
160+
JsonDecoderForUnsignedTypes(lexer, json)
161+
} else super.decodeTaggedInline(tag, inlineDescriptor)
162+
}
160163

161164
override fun decodeInline(descriptor: SerialDescriptor): Decoder {
162165
return if (currentTagOrNull != null) super.decodeInline(descriptor)

0 commit comments

Comments
 (0)