Commit fb02f0ec authored by Matt Menke's avatar Matt Menke Committed by Commit Bot

UnescapeURLComponent: Don't unescape UTF-8 space characters.

Bug: 824715
Change-Id: I71d7f38a2dbe9de6515b8e9d284ab622c2311276
Reviewed-on: https://chromium-review.googlesource.com/1014367
Commit-Queue: Matt Giuca <mgiuca@chromium.org>
Reviewed-by: default avatarMatt Giuca <mgiuca@chromium.org>
Cr-Commit-Position: refs/heads/master@{#551260}
parent 0e701c03
...@@ -228,7 +228,28 @@ bool ShouldUnescapeCodePoint(UnescapeRule::Type rules, uint32_t code_point) { ...@@ -228,7 +228,28 @@ bool ShouldUnescapeCodePoint(UnescapeRule::Type rules, uint32_t code_point) {
code_point == 0x1F50F || // LOCK WITH INK PEN (%F0%9F%94%8F) code_point == 0x1F50F || // LOCK WITH INK PEN (%F0%9F%94%8F)
code_point == 0x1F510 || // CLOSED LOCK WITH KEY (%F0%9F%94%90) code_point == 0x1F510 || // CLOSED LOCK WITH KEY (%F0%9F%94%90)
code_point == 0x1F512 || // LOCK (%F0%9F%94%92) code_point == 0x1F512 || // LOCK (%F0%9F%94%92)
code_point == 0x1F513); // OPEN LOCK (%F0%9F%94%93) code_point == 0x1F513 || // OPEN LOCK (%F0%9F%94%93)
// Spaces are also banned, as they can be used to scroll text out of view.
code_point == 0x0085 || // NEXT LINE (%C2%85)
code_point == 0x00A0 || // NO-BREAK SPACE (%C2%A0)
code_point == 0x1680 || // OGHAM SPACE MARK (%E1%9A%80)
code_point == 0x2000 || // EN QUAD (%E2%80%80)
code_point == 0x2001 || // EM QUAD (%E2%80%81)
code_point == 0x2002 || // EN SPACE (%E2%80%82)
code_point == 0x2003 || // EM SPACE (%E2%80%83)
code_point == 0x2004 || // THREE-PER-EM SPACE (%E2%80%84)
code_point == 0x2005 || // FOUR-PER-EM SPACE (%E2%80%85)
code_point == 0x2006 || // SIX-PER-EM SPACE (%E2%80%86)
code_point == 0x2007 || // FIGURE SPACE (%E2%80%87)
code_point == 0x2008 || // PUNCTUATION SPACE (%E2%80%88)
code_point == 0x2009 || // THIN SPACE (%E2%80%89)
code_point == 0x200A || // HAIR SPACE (%E2%80%8A)
code_point == 0x2028 || // LINE SEPARATOR (%E2%80%A8)
code_point == 0x2029 || // PARAGRAPH SEPARATOR (%E2%80%A9)
code_point == 0x202F || // NARROW NO-BREAK SPACE (%E2%80%AF)
code_point == 0x205F || // MEDIUM MATHEMATICAL SPACE (%E2%81%9F)
code_point == 0x3000); // IDEOGRAPHIC SPACE (%E3%80%80)
} }
// Unescapes |escaped_text| according to |rules|, returning the resulting // Unescapes |escaped_text| according to |rules|, returning the resulting
......
...@@ -153,12 +153,12 @@ TEST(EscapeTest, UnescapeURLComponent) { ...@@ -153,12 +153,12 @@ TEST(EscapeTest, UnescapeURLComponent) {
"Some%20random text %25%2dOK"}, "Some%20random text %25%2dOK"},
{"Some%20random text %25%2dOK", UnescapeRule::NORMAL, {"Some%20random text %25%2dOK", UnescapeRule::NORMAL,
"Some%20random text %25-OK"}, "Some%20random text %25-OK"},
{"Some%20random text %25%E2%80", UnescapeRule::NORMAL, {"Some%20random text %25%E1%A6", UnescapeRule::NORMAL,
"Some%20random text %25\xE2\x80"}, "Some%20random text %25\xE1\xA6"},
{"Some%20random text %25%E2%80OK", UnescapeRule::NORMAL, {"Some%20random text %25%E1%A6OK", UnescapeRule::NORMAL,
"Some%20random text %25\xE2\x80OK"}, "Some%20random text %25\xE1\xA6OK"},
{"Some%20random text %25%E2%80%84OK", UnescapeRule::NORMAL, {"Some%20random text %25%E1%A6%99OK", UnescapeRule::NORMAL,
"Some%20random text %25\xE2\x80\x84OK"}, "Some%20random text %25\xE1\xA6\x99OK"},
// BiDi Control characters should not be unescaped unless explicity told // BiDi Control characters should not be unescaped unless explicity told
// to // to
...@@ -236,6 +236,18 @@ TEST(EscapeTest, UnescapeURLComponent) { ...@@ -236,6 +236,18 @@ TEST(EscapeTest, UnescapeURLComponent) {
UnescapeRule::NORMAL | UnescapeRule::SPOOFING_AND_CONTROL_CHARS, UnescapeRule::NORMAL | UnescapeRule::SPOOFING_AND_CONTROL_CHARS,
"Some%20random text %25\xF0\x9F\x94\x93OK"}, "Some%20random text %25\xF0\x9F\x94\x93OK"},
// Spaces
{"(%C2%85)(%C2%A0)(%E1%9A%80)(%E2%80%80)", UnescapeRule::NORMAL,
"(%C2%85)(%C2%A0)(%E1%9A%80)(%E2%80%80)"},
{"(%E2%80%81)(%E2%80%82)(%E2%80%83)(%E2%80%84)", UnescapeRule::NORMAL,
"(%E2%80%81)(%E2%80%82)(%E2%80%83)(%E2%80%84)"},
{"(%E2%80%85)(%E2%80%86)(%E2%80%87)(%E2%80%88)", UnescapeRule::NORMAL,
"(%E2%80%85)(%E2%80%86)(%E2%80%87)(%E2%80%88)"},
{"(%E2%80%89)(%E2%80%8A)(%E2%80%A8)(%E2%80%A9)", UnescapeRule::NORMAL,
"(%E2%80%89)(%E2%80%8A)(%E2%80%A8)(%E2%80%A9)"},
{"(%E2%80%AF)(%E2%81%9F)(%E3%80%80)", UnescapeRule::NORMAL,
"(%E2%80%AF)(%E2%81%9F)(%E3%80%80)"},
// Two spoofing characters in a row should not be unescaped. // Two spoofing characters in a row should not be unescaped.
{"%D8%9C%D8%9C", UnescapeRule::NORMAL, "%D8%9C%D8%9C"}, {"%D8%9C%D8%9C", UnescapeRule::NORMAL, "%D8%9C%D8%9C"},
// Non-spoofing characters surrounded by spoofing characters should be // Non-spoofing characters surrounded by spoofing characters should be
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment