Commit 21fb68f9 authored by Shengfa Lin's avatar Shengfa Lin Committed by Commit Bot

[chromium] Replace lone low surrogate in json parser

With JSON_REPLACE_INVALID_CHARACTERS option, json parser is
replacing lone high surrogate but not lone low surrogate.
This is to replace lone low surrogate.

Tests:

    autoninja -C out/Default content_shell base base_unittests services services_unittests
    out/Default/base_unittests --gtest_filter="JSONParserTest.*:JSONReaderTest.*:JSONStringEscapeTest.*:StringUtilTest.IsStringUTF8"

Bug: chromedriver:3515
Change-Id: I547b8b7edaa24e7de79dc980746815de607a0918
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2248342Reviewed-by: default avatarMathias Bynens <mathias@chromium.org>
Reviewed-by: default avatarRobert Sesek <rsesek@chromium.org>
Commit-Queue: Shengfa Lin <shengfa@google.com>
Cr-Commit-Position: refs/heads/master@{#779553}
parent c6f5e719
...@@ -589,10 +589,13 @@ bool JSONParser::DecodeUTF16(uint32_t* out_code_point) { ...@@ -589,10 +589,13 @@ bool JSONParser::DecodeUTF16(uint32_t* out_code_point) {
// If this is a high surrogate, consume the next code unit to get the // If this is a high surrogate, consume the next code unit to get the
// low surrogate. // low surrogate.
if (CBU16_IS_SURROGATE(code_unit16_high)) { if (CBU16_IS_SURROGATE(code_unit16_high)) {
// Make sure this is the high surrogate. If not, it's an encoding // Make sure this is the high surrogate.
// error. if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) {
if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
return false; return false;
*out_code_point = kUnicodeReplacementPoint;
return true;
}
// Make sure that the token has more characters to consume the // Make sure that the token has more characters to consume the
// lower surrogate. // lower surrogate.
......
...@@ -833,10 +833,18 @@ TEST(JSONReaderTest, DecodeNegativeEscapeSequence) { ...@@ -833,10 +833,18 @@ TEST(JSONReaderTest, DecodeNegativeEscapeSequence) {
// Verifies invalid code points are replaced. // Verifies invalid code points are replaced.
TEST(JSONReaderTest, ReplaceInvalidCharacters) { TEST(JSONReaderTest, ReplaceInvalidCharacters) {
// U+D800 is a lone surrogate. // U+D800 is a lone high surrogate.
const std::string invalid = "\"\xED\xA0\x80\""; const std::string invalid_high = "\"\xED\xA0\x80\"";
Optional<Value> value = Optional<Value> value =
JSONReader::Read(invalid, JSON_REPLACE_INVALID_CHARACTERS); JSONReader::Read(invalid_high, JSON_REPLACE_INVALID_CHARACTERS);
ASSERT_TRUE(value);
ASSERT_TRUE(value->is_string());
// Expect three U+FFFD (one for each UTF-8 byte in the invalid code point).
EXPECT_EQ("\xEF\xBF\xBD\xEF\xBF\xBD\xEF\xBF\xBD", value->GetString());
// U+DFFF is a lone low surrogate.
const std::string invalid_low = "\"\xED\xBF\xBF\"";
value = JSONReader::Read(invalid_low, JSON_REPLACE_INVALID_CHARACTERS);
ASSERT_TRUE(value); ASSERT_TRUE(value);
ASSERT_TRUE(value->is_string()); ASSERT_TRUE(value->is_string());
// Expect three U+FFFD (one for each UTF-8 byte in the invalid code point). // Expect three U+FFFD (one for each UTF-8 byte in the invalid code point).
...@@ -844,10 +852,17 @@ TEST(JSONReaderTest, ReplaceInvalidCharacters) { ...@@ -844,10 +852,17 @@ TEST(JSONReaderTest, ReplaceInvalidCharacters) {
} }
TEST(JSONReaderTest, ReplaceInvalidUTF16EscapeSequence) { TEST(JSONReaderTest, ReplaceInvalidUTF16EscapeSequence) {
// U+D800 is a lone surrogate. // U+D800 is a lone high surrogate.
const std::string invalid = "\"_\\uD800_\""; const std::string invalid_high = "\"_\\uD800_\"";
Optional<Value> value = Optional<Value> value =
JSONReader::Read(invalid, JSON_REPLACE_INVALID_CHARACTERS); JSONReader::Read(invalid_high, JSON_REPLACE_INVALID_CHARACTERS);
ASSERT_TRUE(value);
ASSERT_TRUE(value->is_string());
EXPECT_EQ("_\xEF\xBF\xBD_", value->GetString());
// U+DFFF is a lone low surrogate.
const std::string invalid_low = "\"_\\uDFFF_\"";
value = JSONReader::Read(invalid_low, JSON_REPLACE_INVALID_CHARACTERS);
ASSERT_TRUE(value); ASSERT_TRUE(value);
ASSERT_TRUE(value->is_string()); ASSERT_TRUE(value->is_string());
EXPECT_EQ("_\xEF\xBF\xBD_", value->GetString()); EXPECT_EQ("_\xEF\xBF\xBD_", value->GetString());
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment