Commit e9e8846d authored by Etienne Bergeron's avatar Etienne Bergeron Committed by Commit Bot

Fix codepoint iteration in ReplaceControlCharactersWithSymbols

The CL is fixing a bug when the rewriting loop iterate through
the input text. If the codepoint is updated with a larger codepoint,
the next_offset was not updated properly and was pointing in the middle
of the surrogate pair. This was not happening since only the PUA
codepoints were producing larger characters.

Change-Id: I4c6eea9a1a02374d04a0271bd4d6140e123069ba
Bug: 4
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1903808Reviewed-by: default avatarAlexei Svitkine <asvitkine@chromium.org>
Commit-Queue: Etienne Bergeron <etienneb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#713491}
parent 22c0ccf8
...@@ -316,20 +316,19 @@ void ReplaceControlCharactersWithSymbols(bool multiline, base::string16* text) { ...@@ -316,20 +316,19 @@ void ReplaceControlCharactersWithSymbols(bool multiline, base::string16* text) {
// Control Pictures block (see: https://unicode.org/charts/PDF/U2400.pdf). // Control Pictures block (see: https://unicode.org/charts/PDF/U2400.pdf).
constexpr base::char16 kSymbolsCodepoint = 0x2400; constexpr base::char16 kSymbolsCodepoint = 0x2400;
size_t next_offset = 0; size_t offset = 0;
while (next_offset < text->length()) { while (offset < text->length()) {
size_t offset = next_offset;
UChar32 codepoint; UChar32 codepoint;
U16_NEXT(text->c_str(), next_offset, text->length(), codepoint); U16_GET(text->c_str(), 0, offset, text->length(), codepoint);
if (codepoint >= 0 && codepoint <= 0x1F) { if (codepoint >= 0 && codepoint <= 0x1F) {
// The newline character should be kept as-is when rendertext is // The newline character should be kept as-is when rendertext is
// multiline. // multiline.
if (codepoint == '\n' && multiline) if (codepoint != '\n' || !multiline) {
continue;
// Replace codepoints with their visual symbols, which are at the same // Replace codepoints with their visual symbols, which are at the same
// offset from kSymbolsCodepoint. // offset from kSymbolsCodepoint.
(*text)[offset] = kSymbolsCodepoint + codepoint; (*text)[offset] = kSymbolsCodepoint + codepoint;
}
} else if (codepoint == 0x7F) { } else if (codepoint == 0x7F) {
// Replace the 'del' codepoint by its symbol (u2421). // Replace the 'del' codepoint by its symbol (u2421).
(*text)[offset] = kSymbolsCodepoint + 0x21; (*text)[offset] = kSymbolsCodepoint + 0x21;
...@@ -349,6 +348,10 @@ void ReplaceControlCharactersWithSymbols(bool multiline, base::string16* text) { ...@@ -349,6 +348,10 @@ void ReplaceControlCharactersWithSymbols(bool multiline, base::string16* text) {
ReplaceCodepointAtIndex(offset, kReplacementCodepoint, text); ReplaceCodepointAtIndex(offset, kReplacementCodepoint, text);
} }
} }
// Move offset to the index of the next codepoint. This must be computed
// after any rewriting steps above since codepoint size may differ.
U16_NEXT(text->c_str(), offset, text->length(), codepoint);
} }
} }
......
...@@ -4792,6 +4792,11 @@ TEST_F(RenderTextTest, PrivateUseCharacterReplacement) { ...@@ -4792,6 +4792,11 @@ TEST_F(RenderTextTest, PrivateUseCharacterReplacement) {
// a surrogate pair, it needs to be replaced by two characters. // a surrogate pair, it needs to be replaced by two characters.
EXPECT_EQ(WideToUTF16(L"xx\ufffd\ufffda\ufffdz"), EXPECT_EQ(WideToUTF16(L"xx\ufffd\ufffda\ufffdz"),
render_text->GetDisplayText()); render_text->GetDisplayText());
// The private use characters from Area-B must be replaced. The rewrite step
// replaced 2 characters by 1 character.
render_text->SetText(WideToUTF16(L"x\U00100000\U00100001\U00100002"));
EXPECT_EQ(WideToUTF16(L"x\ufffd\ufffd\ufffd"), render_text->GetDisplayText());
} }
TEST_F(RenderTextTest, InvalidSurrogateCharacterReplacement) { TEST_F(RenderTextTest, InvalidSurrogateCharacterReplacement) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment