Commit fc9e3f89 authored by Etienne Bergeron's avatar Etienne Bergeron Committed by Commit Bot

Rewrite invalid surrogate and non-character codepoints

This CL is writing the invalid codepoint by the replacement
characters.

See https://unicode.org/reports/tr44/
"""
  Noncharacter_Code_Point	: Code points permanently reserved for internal use.
"""


Bug: 1011818
Change-Id: Id39b42f52a8c36887fabcd81e794f46ebedec089
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1902772Reviewed-by: default avatarAlexei Svitkine <asvitkine@chromium.org>
Reviewed-by: default avatarRobert Liao <robliao@chromium.org>
Commit-Queue: Etienne Bergeron <etienneb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#713486}
parent d290a035
......@@ -333,6 +333,13 @@ void ReplaceControlCharactersWithSymbols(bool multiline, base::string16* text) {
} else if (codepoint == 0x7F) {
// Replace the 'del' codepoint by its symbol (u2421).
(*text)[offset] = kSymbolsCodepoint + 0x21;
} else if (!U_IS_UNICODE_CHAR(codepoint)) {
// Unicode codepoint that can't be assigned a character. This handles:
// - single surrogate codepoints,
// - last two codepoints on each plane,
// - invalid characters (e.g. u+fdd0..u+fdef)
// - codepoints above u+10ffff
ReplaceCodepointAtIndex(offset, kReplacementCodepoint, text);
} else if (codepoint > 0x7F) {
// Private use codepoints are working with a pair of font and codepoint,
// but they are not used in Chrome.
......
......@@ -863,14 +863,15 @@ TEST_F(RenderTextTest, RevealObscuredText) {
render_text->SetText(UTF8ToUTF16("new longer"));
EXPECT_EQ(GetObscuredString(10), render_text->GetDisplayText());
// Text with invalid surrogates.
// Text with invalid surrogates (surrogates low 0xDC00 and high 0xD800).
// Invalid surrogates are replaced by replacement character (e.g. 0xFFFD).
const base::char16 invalid_surrogates[] = {0xDC00, 0xD800, 'h', 'o', 'p', 0};
render_text->SetText(invalid_surrogates);
EXPECT_EQ(GetObscuredString(5), render_text->GetDisplayText());
render_text->RenderText::SetObscuredRevealIndex(0);
EXPECT_EQ(GetObscuredString(5, 0, 0xDC00), render_text->GetDisplayText());
EXPECT_EQ(GetObscuredString(5, 0, 0xFFFD), render_text->GetDisplayText());
render_text->RenderText::SetObscuredRevealIndex(1);
EXPECT_EQ(GetObscuredString(5, 1, 0xD800), render_text->GetDisplayText());
EXPECT_EQ(GetObscuredString(5, 1, 0xFFFD), render_text->GetDisplayText());
render_text->RenderText::SetObscuredRevealIndex(2);
EXPECT_EQ(GetObscuredString(5, 2, 'h'), render_text->GetDisplayText());
......@@ -4785,7 +4786,7 @@ TEST_F(RenderTextTest, ControlCharacterReplacement) {
TEST_F(RenderTextTest, PrivateUseCharacterReplacement) {
RenderText* render_text = GetRenderText();
render_text->SetText(UTF8ToUTF16("xx\ue78d\ue78fa\U00100042z"));
render_text->SetText(WideToUTF16(L"xx\ue78d\ue78fa\U00100042z"));
// The private use characters should have been replaced. If the code point is
// a surrogate pair, it needs to be replaced by two characters.
......@@ -4793,6 +4794,13 @@ TEST_F(RenderTextTest, PrivateUseCharacterReplacement) {
render_text->GetDisplayText());
}
TEST_F(RenderTextTest, InvalidSurrogateCharacterReplacement) {
// Text with invalid surrogates (surrogates low 0xDC00 and high 0xD800).
RenderText* render_text = GetRenderText();
render_text->SetText(WideToUTF16(L"\xDC00\xD800"));
EXPECT_EQ(WideToUTF16(L"\ufffd\ufffd"), render_text->GetDisplayText());
}
// Make sure the horizontal positions of runs in a line (left-to-right for
// LTR languages and right-to-left for RTL languages).
TEST_F(RenderTextTest, HarfBuzz_HorizontalPositions) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment