Skip to content

Fix a parser bug where tokens are misidentified as commas. #1502

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/json/reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ class JSON_API Reader {
using Errors = std::deque<ErrorInfo>;

bool readToken(Token& token);
bool readTokenSkippingComments(Token& token);
void skipSpaces();
bool match(const Char* pattern, int patternLength);
bool readComment();
Expand Down Expand Up @@ -221,7 +222,6 @@ class JSON_API Reader {
int& column) const;
String getLocationLineAndColumn(Location location) const;
void addComment(Location begin, Location end, CommentPlacement placement);
void skipCommentTokens(Token& token);

static bool containsNewLine(Location begin, Location end);
static String normalizeEOL(Location begin, Location end);
Expand Down
7 changes: 5 additions & 2 deletions src/jsontestrunner/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,11 +240,14 @@ static int parseCommandLine(int argc, const char* argv[], Options* opts) {
return printUsage(argv);
}
int index = 1;
if (Json::String(argv[index]) == "--json-checker") {
opts->features = Json::Features::strictMode();
if (Json::String(argv[index]) == "--parse-only") {
opts->parseOnly = true;
++index;
}
if (Json::String(argv[index]) == "--strict") {
opts->features = Json::Features::strictMode();
++index;
}
if (Json::String(argv[index]) == "--json-config") {
printConfig();
return 3;
Expand Down
74 changes: 25 additions & 49 deletions src/lib_json/json_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,

bool successful = readValue();
Token token;
skipCommentTokens(token);
readTokenSkippingComments(token);
if (collectComments_ && !commentsBefore_.empty())
root.setComment(commentsBefore_, commentAfter);
if (features_.strictRoot_) {
Expand Down Expand Up @@ -157,7 +157,7 @@ bool Reader::readValue() {
throwRuntimeError("Exceeded stackLimit in readValue().");

Token token;
skipCommentTokens(token);
readTokenSkippingComments(token);
bool successful = true;

if (collectComments_ && !commentsBefore_.empty()) {
Expand Down Expand Up @@ -225,14 +225,14 @@ bool Reader::readValue() {
return successful;
}

void Reader::skipCommentTokens(Token& token) {
bool Reader::readTokenSkippingComments(Token& token) {
bool success = readToken(token);
if (features_.allowComments_) {
do {
readToken(token);
} while (token.type_ == tokenComment);
} else {
readToken(token);
while (success && token.type_ == tokenComment) {
success = readToken(token);
}
}
return success;
}

bool Reader::readToken(Token& token) {
Expand Down Expand Up @@ -446,12 +446,7 @@ bool Reader::readObject(Token& token) {
Value init(objectValue);
currentValue().swapPayload(init);
currentValue().setOffsetStart(token.start_ - begin_);
while (readToken(tokenName)) {
bool initialTokenOk = true;
while (tokenName.type_ == tokenComment && initialTokenOk)
initialTokenOk = readToken(tokenName);
if (!initialTokenOk)
break;
while (readTokenSkippingComments(tokenName)) {
if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
return true;
name.clear();
Expand Down Expand Up @@ -480,15 +475,11 @@ bool Reader::readObject(Token& token) {
return recoverFromError(tokenObjectEnd);

Token comma;
if (!readToken(comma) ||
(comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
comma.type_ != tokenComment)) {
if (!readTokenSkippingComments(comma) ||
(comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
return addErrorAndRecover("Missing ',' or '}' in object declaration",
comma, tokenObjectEnd);
}
bool finalizeTokenOk = true;
while (comma.type_ == tokenComment && finalizeTokenOk)
finalizeTokenOk = readToken(comma);
if (comma.type_ == tokenObjectEnd)
return true;
}
Expand Down Expand Up @@ -518,10 +509,7 @@ bool Reader::readArray(Token& token) {

Token currentToken;
// Accept Comment after last item in the array.
ok = readToken(currentToken);
while (currentToken.type_ == tokenComment && ok) {
ok = readToken(currentToken);
}
ok = readTokenSkippingComments(currentToken);
bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
currentToken.type_ != tokenArrayEnd);
if (!ok || badTokenType) {
Expand Down Expand Up @@ -943,6 +931,7 @@ class OurReader {
using Errors = std::deque<ErrorInfo>;

bool readToken(Token& token);
bool readTokenSkippingComments(Token& token);
void skipSpaces();
void skipBom(bool skipBom);
bool match(const Char* pattern, int patternLength);
Expand Down Expand Up @@ -976,7 +965,6 @@ class OurReader {
int& column) const;
String getLocationLineAndColumn(Location location) const;
void addComment(Location begin, Location end, CommentPlacement placement);
void skipCommentTokens(Token& token);

static String normalizeEOL(Location begin, Location end);
static bool containsNewLine(Location begin, Location end);
Expand Down Expand Up @@ -1030,7 +1018,7 @@ bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
bool successful = readValue();
nodes_.pop();
Token token;
skipCommentTokens(token);
readTokenSkippingComments(token);
if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
addError("Extra non-whitespace after JSON value.", token);
return false;
Expand Down Expand Up @@ -1058,7 +1046,7 @@ bool OurReader::readValue() {
if (nodes_.size() > features_.stackLimit_)
throwRuntimeError("Exceeded stackLimit in readValue().");
Token token;
skipCommentTokens(token);
readTokenSkippingComments(token);
bool successful = true;

if (collectComments_ && !commentsBefore_.empty()) {
Expand Down Expand Up @@ -1145,14 +1133,14 @@ bool OurReader::readValue() {
return successful;
}

void OurReader::skipCommentTokens(Token& token) {
bool OurReader::readTokenSkippingComments(Token& token) {
bool success = readToken(token);
if (features_.allowComments_) {
do {
readToken(token);
} while (token.type_ == tokenComment);
} else {
readToken(token);
while (success && token.type_ == tokenComment) {
success = readToken(token);
}
}
return success;
}

bool OurReader::readToken(Token& token) {
Expand Down Expand Up @@ -1449,12 +1437,7 @@ bool OurReader::readObject(Token& token) {
Value init(objectValue);
currentValue().swapPayload(init);
currentValue().setOffsetStart(token.start_ - begin_);
while (readToken(tokenName)) {
bool initialTokenOk = true;
while (tokenName.type_ == tokenComment && initialTokenOk)
initialTokenOk = readToken(tokenName);
if (!initialTokenOk)
break;
while (readTokenSkippingComments(tokenName)) {
if (tokenName.type_ == tokenObjectEnd &&
(name.empty() ||
features_.allowTrailingCommas_)) // empty object or trailing comma
Expand Down Expand Up @@ -1491,15 +1474,11 @@ bool OurReader::readObject(Token& token) {
return recoverFromError(tokenObjectEnd);

Token comma;
if (!readToken(comma) ||
(comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
comma.type_ != tokenComment)) {
if (!readTokenSkippingComments(comma) ||
(comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
return addErrorAndRecover("Missing ',' or '}' in object declaration",
comma, tokenObjectEnd);
}
bool finalizeTokenOk = true;
while (comma.type_ == tokenComment && finalizeTokenOk)
finalizeTokenOk = readToken(comma);
if (comma.type_ == tokenObjectEnd)
return true;
}
Expand Down Expand Up @@ -1533,10 +1512,7 @@ bool OurReader::readArray(Token& token) {

Token currentToken;
// Accept Comment after last item in the array.
ok = readToken(currentToken);
while (currentToken.type_ == tokenComment && ok) {
ok = readToken(currentToken);
}
ok = readTokenSkippingComments(currentToken);
bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
currentToken.type_ != tokenArrayEnd);
if (!ok || badTokenType) {
Expand Down
4 changes: 4 additions & 0 deletions test/data/fail_strict_comment_01.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"a": "aaa",
"b": "bbb" // comments not allowed in strict mode
}
4 changes: 4 additions & 0 deletions test/data/fail_strict_comment_02.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"a": "aaa", // comments not allowed in strict mode
"b": "bbb"
}
3 changes: 3 additions & 0 deletions test/data/fail_strict_comment_03.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"array" : [1, 2, 3 /* comments not allowed in strict mode */]
}
1 change: 1 addition & 0 deletions test/data/fail_test_object_02.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"one": 1 /* } */ { "two" : 2 }
9 changes: 6 additions & 3 deletions test/runjsontests.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,14 +97,17 @@ def runAllTests(jsontest_executable_path, input_dir = None,
valgrind_path = use_valgrind and VALGRIND_CMD or ''
for input_path in tests + test_jsonchecker:
expect_failure = os.path.basename(input_path).startswith('fail')
is_json_checker_test = (input_path in test_jsonchecker) or expect_failure
is_json_checker_test = input_path in test_jsonchecker
is_parse_only = is_json_checker_test or expect_failure
is_strict_test = ('_strict_' in os.path.basename(input_path)) or is_json_checker_test
print('TESTING:', input_path, end=' ')
options = is_json_checker_test and '--json-checker' or ''
options = is_parse_only and '--parse-only' or ''
options += is_strict_test and ' --strict' or ''
options += ' --json-writer %s'%writerClass
cmd = '%s%s %s "%s"' % ( valgrind_path, jsontest_executable_path, options,
input_path)
status, process_output = getStatusOutput(cmd)
if is_json_checker_test:
if is_parse_only:
if expect_failure:
if not status:
print('FAILED')
Expand Down
Loading