Commit e9e8846d authored by Etienne Bergeron's avatar Etienne Bergeron Committed by Commit Bot

Fix codepoint iteration in ReplaceControlCharactersWithSymbols

The CL is fixing a bug when the rewriting loop iterate through
the input text. If the codepoint is updated with a larger codepoint,
the next_offset was not updated properly and was pointing in the middle
of the surrogate pair. This was not happening since only the PUA
codepoints were producing larger characters.

Change-Id: I4c6eea9a1a02374d04a0271bd4d6140e123069ba
Bug: 4
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1903808Reviewed-by: default avatarAlexei Svitkine <asvitkine@chromium.org>
Commit-Queue: Etienne Bergeron <etienneb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#713491}
parent 22c0ccf8
......@@ -316,20 +316,19 @@ void ReplaceControlCharactersWithSymbols(bool multiline, base::string16* text) {
// Control Pictures block (see: https://unicode.org/charts/PDF/U2400.pdf).
constexpr base::char16 kSymbolsCodepoint = 0x2400;
size_t next_offset = 0;
while (next_offset < text->length()) {
size_t offset = next_offset;
size_t offset = 0;
while (offset < text->length()) {
UChar32 codepoint;
U16_NEXT(text->c_str(), next_offset, text->length(), codepoint);
U16_GET(text->c_str(), 0, offset, text->length(), codepoint);
if (codepoint >= 0 && codepoint <= 0x1F) {
// The newline character should be kept as-is when rendertext is
// multiline.
if (codepoint == '\n' && multiline)
continue;
// Replace codepoints with their visual symbols, which are at the same
// offset from kSymbolsCodepoint.
(*text)[offset] = kSymbolsCodepoint + codepoint;
if (codepoint != '\n' || !multiline) {
// Replace codepoints with their visual symbols, which are at the same
// offset from kSymbolsCodepoint.
(*text)[offset] = kSymbolsCodepoint + codepoint;
}
} else if (codepoint == 0x7F) {
// Replace the 'del' codepoint by its symbol (u2421).
(*text)[offset] = kSymbolsCodepoint + 0x21;
......@@ -349,6 +348,10 @@ void ReplaceControlCharactersWithSymbols(bool multiline, base::string16* text) {
ReplaceCodepointAtIndex(offset, kReplacementCodepoint, text);
}
}
// Move offset to the index of the next codepoint. This must be computed
// after any rewriting steps above since codepoint size may differ.
U16_NEXT(text->c_str(), offset, text->length(), codepoint);
}
}
......
......@@ -4792,6 +4792,11 @@ TEST_F(RenderTextTest, PrivateUseCharacterReplacement) {
// a surrogate pair, it needs to be replaced by two characters.
EXPECT_EQ(WideToUTF16(L"xx\ufffd\ufffda\ufffdz"),
render_text->GetDisplayText());
// The private use characters from Area-B must be replaced. The rewrite step
// replaced 2 characters by 1 character.
render_text->SetText(WideToUTF16(L"x\U00100000\U00100001\U00100002"));
EXPECT_EQ(WideToUTF16(L"x\ufffd\ufffd\ufffd"), render_text->GetDisplayText());
}
TEST_F(RenderTextTest, InvalidSurrogateCharacterReplacement) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment