Commit b6cef11d authored by Etienne Bergeron's avatar Etienne Bergeron Committed by Commit Bot

Refactor the layout text rewriting rules

This CL is refactoring the way codepoints are rewritten
for the conversion between text to layout_text.

There is an invariant that must hold:
  * Same amount of codepoint
The text size may differ and text indexes are not the same.

This CL is proposing to use a map function to replace
codepoints. This avoid error-prone custom codepoints
replacement.

Bug: 1022893
Change-Id: Ic1fcdfe6b34a1bf1dd47f0ee6a5e157bd131d0ed
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1908447
Commit-Queue: Etienne Bergeron <etienneb@chromium.org>
Reviewed-by: default avatarAlexei Svitkine <asvitkine@chromium.org>
Reviewed-by: default avatarRobert Liao <robliao@chromium.org>
Cr-Commit-Position: refs/heads/master@{#715157}
parent d900270f
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
#include <algorithm> #include <algorithm>
#include <climits> #include <climits>
#include "base/bind.h"
#include "base/callback.h"
#include "base/command_line.h" #include "base/command_line.h"
#include "base/feature_list.h" #include "base/feature_list.h"
#include "base/i18n/break_iterator.h" #include "base/i18n/break_iterator.h"
...@@ -226,36 +228,42 @@ size_t GetTextIndexForOtherText(const base::string16& text, ...@@ -226,36 +228,42 @@ size_t GetTextIndexForOtherText(const base::string16& text,
return other_text.length(); return other_text.length();
} }
// Returns the codepoint at text[index]. This function handles that codepoint // Returns the offset (codepoint rank) for the codepoint at text[index].
// can be one or two characters. It also handles offset in a middle of a size_t GetOffsetForTextIndex(const base::string16& text, size_t index) {
// surrogate pair. DCHECK_LT(index, text.length());
UChar32 GetCodepointAtIndex(const base::string16& text, size_t index) {
// Move index to the beginning of the surrogate pair, if needed. // Move index to the beginning of the surrogate pair, if needed.
U16_SET_CP_START(text.data(), 0, index); U16_SET_CP_START(text.data(), 0, index);
// Retrieve the codepoint at index.
UChar32 codepoint; // Iterates through codepoints until we reach |index| in |text|.
U16_NEXT(text.data(), index, text.length(), codepoint); for (base::i18n::UTF16CharIterator text_iter(&text); !text_iter.end();
return codepoint; text_iter.Advance()) {
// Codepoint at |index| is found, returns the corresponding offset.
if (text_iter.array_pos() == static_cast<int32_t>(index))
return text_iter.char_offset();
}
NOTREACHED();
return text.length();
} }
// Replace a the codepoint at text[index] by the codepoint specified in // Applies a conversion function on codepoints in |text|. The resulting text
// |new_codepoint|. This function handles that codepoint can be one or two // size may differ but the amount of codepoints stay the same. The rewrite
// characters and enforce to replace a codepoint by a single codepoint. // function |func| receives the offset (e.g. rank) of the codepoint and the
void ReplaceCodepointAtIndex(size_t index, // codepoint.
UChar32 new_codepoint, void RewriteCodepointsInPlace(
base::RepeatingCallback<UChar32(size_t, UChar32)> func,
base::string16* text) { base::string16* text) {
// Move index to the beginning of the surrogate pair, if needed. size_t index = 0;
U16_SET_CP_START(text->data(), 0, index); size_t rank = 0;
while (index < text->length()) {
// Gets the range to be replaced. // Gets the range to be replaced.
size_t end = index;
UChar32 original_codepoint; UChar32 original_codepoint;
U16_NEXT(text->data(), end, text->length(), original_codepoint); U16_GET(text->c_str(), 0, index, text->length(), original_codepoint);
DCHECK_LT(index, end); // Find the codepoint replacement.
DCHECK_LT(index, text->length()); UChar32 new_codepoint = func.Run(rank, original_codepoint);
DCHECK_LE(end, text->length());
if (new_codepoint != original_codepoint) {
// Encode the codepoint in utf16 (e.g. base::char16). // Encode the codepoint in utf16 (e.g. base::char16).
base::char16 replace_chars[U16_MAX_LENGTH]; base::char16 replace_chars[U16_MAX_LENGTH];
size_t replace_length = U16_LENGTH(new_codepoint); size_t replace_length = U16_LENGTH(new_codepoint);
...@@ -269,90 +277,99 @@ void ReplaceCodepointAtIndex(size_t index, ...@@ -269,90 +277,99 @@ void ReplaceCodepointAtIndex(size_t index,
// Replace the codepoint range by the new codepoint characters. // Replace the codepoint range by the new codepoint characters.
text->replace(index, U16_LENGTH(original_codepoint), replace_chars, text->replace(index, U16_LENGTH(original_codepoint), replace_chars,
replace_length); replace_length);
}
// Create an obscured text for the given |text| where characters are replaced by
// an bullet. In multiline, the newline character is not replaced. If
// |reveal_index| is specify, the codepoint at |reveal_index| kept its original
// value.
base::string16 CreateObscuredText(const base::string16& text,
bool multiline,
int reveal_index) {
// Make an initial string with the same amount of characters.
size_t obscured_text_length =
static_cast<size_t>(UTF16IndexToOffset(text, 0, text.length()));
base::string16 output_text(obscured_text_length,
RenderText::kPasswordReplacementChar);
// In multiline, do not replace the newline characters since they are used to
// split lines.
if (multiline) {
for (size_t i = 0; i < text.length(); ++i) {
if (text[i] == '\n')
output_text[i] = '\n';
}
} }
// If needed, reveal the character at position |reveal_index|. // Move index of the next codepoint. This must be computed after any
if (reveal_index >= 0 && reveal_index < static_cast<int>(text.length())) { // rewriting steps above since codepoint size may differ.
UChar32 original_codepoint = GetCodepointAtIndex(text, reveal_index); U16_NEXT(text->c_str(), index, text->length(), new_codepoint);
size_t output_index = ++rank;
GetTextIndexForOtherText(text, reveal_index, output_text); }
ReplaceCodepointAtIndex(output_index, original_codepoint, &output_text); }
// Obscures characters for the given |text|. The obscured characters are
// replaced by an bullet. In multiline, the newline character is not replaced.
// If |reveal_index| is specified, the codepoint at |reveal_index| keeps its
// original value.
void ObscuredText(bool multiline, int reveal_index, base::string16* text) {
DCHECK_LE(-1, reveal_index);
// Convert reveal_index to a rank because indexes are invalidated since the
// text is replace in-place. Reveal index can be -1 to indicate that no
// character should be revealed. If |reveal_index| is out-of-bound, no
// character should be revealed.
size_t reveal_rank;
if (reveal_index != -1 &&
base::checked_cast<size_t>(reveal_index) < text->size()) {
// Move |reveal_index| to the beginning of the surrogate pair, if needed.
U16_SET_CP_START(text->data(), 0, reveal_index);
reveal_rank = GetOffsetForTextIndex(*text, reveal_index);
} else {
reveal_rank = text->length();
} }
return output_text; RewriteCodepointsInPlace(
base::BindRepeating(
[](bool multiline, size_t reveal_rank, size_t rank,
UChar32 codepoint) -> UChar32 {
if ((reveal_rank == rank) || (codepoint == '\n' && multiline))
return codepoint;
return RenderText::kPasswordReplacementChar;
},
multiline, reveal_rank),
text);
} }
// Replace the codepoints not handled by RenderText by an other compatible // Replaces the unicode control characters, control characters and PUA (Private
// codepoint. Replace the unicode control characters ISO 6429 (block C0) by // Use Areas) codepoints.
// their corresponding visual symbols. Control characters can't be displayed but UChar32 ReplaceControlCharacter(bool multiline,
// their visual symbols can. Replace PUA (Private Use Areas) codepoints with the size_t index,
// 'replacement character'. UChar32 codepoint) {
void ReplaceControlCharactersWithSymbols(bool multiline, base::string16* text) { // 'REPLACEMENT CHARACTER' used to replace an unknown,
// 'REPLACEMENT CHARACTER' used to replace an unknown, unrecognized or // unrecognized or unrepresentable character.
// unrepresentable character.
constexpr base::char16 kReplacementCodepoint = 0xFFFD; constexpr base::char16 kReplacementCodepoint = 0xFFFD;
// Control Pictures block (see: https://unicode.org/charts/PDF/U2400.pdf). // Control Pictures block (see:
// https://unicode.org/charts/PDF/U2400.pdf).
constexpr base::char16 kSymbolsCodepoint = 0x2400; constexpr base::char16 kSymbolsCodepoint = 0x2400;
size_t offset = 0;
while (offset < text->length()) {
UChar32 codepoint;
U16_GET(text->c_str(), 0, offset, text->length(), codepoint);
if (codepoint >= 0 && codepoint <= 0x1F) { if (codepoint >= 0 && codepoint <= 0x1F) {
// The newline character should be kept as-is when rendertext is // The newline character should be kept as-is when
// multiline. // rendertext is multiline.
if (codepoint != '\n' || !multiline) { if (codepoint != '\n' || !multiline) {
// Replace codepoints with their visual symbols, which are at the same // Replace codepoints with their visual symbols, which are
// offset from kSymbolsCodepoint. // at the same offset from kSymbolsCodepoint.
(*text)[offset] = kSymbolsCodepoint + codepoint; return kSymbolsCodepoint + codepoint;
} }
} else if (codepoint == 0x7F) { } else if (codepoint == 0x7F) {
// Replace the 'del' codepoint by its symbol (u2421). // Replace the 'del' codepoint by its symbol (u2421).
(*text)[offset] = kSymbolsCodepoint + 0x21; return kSymbolsCodepoint + 0x21;
} else if (!U_IS_UNICODE_CHAR(codepoint)) { } else if (!U_IS_UNICODE_CHAR(codepoint)) {
// Unicode codepoint that can't be assigned a character. This handles: // Unicode codepoint that can't be assigned a character.
// This handles:
// - single surrogate codepoints, // - single surrogate codepoints,
// - last two codepoints on each plane, // - last two codepoints on each plane,
// - invalid characters (e.g. u+fdd0..u+fdef) // - invalid characters (e.g. u+fdd0..u+fdef)
// - codepoints above u+10ffff // - codepoints above u+10ffff
ReplaceCodepointAtIndex(offset, kReplacementCodepoint, text); return kReplacementCodepoint;
} else if (codepoint > 0x7F) { } else if (codepoint > 0x7F) {
// Private use codepoints are working with a pair of font and codepoint, // Private use codepoints are working with a pair of font
// but they are not used in Chrome. // and codepoint, but they are not used in Chrome.
const int8_t codepoint_category = u_charType(codepoint); const int8_t codepoint_category = u_charType(codepoint);
if (codepoint_category == U_PRIVATE_USE_CHAR || if (codepoint_category == U_PRIVATE_USE_CHAR ||
codepoint_category == U_CONTROL_CHAR) { codepoint_category == U_CONTROL_CHAR) {
ReplaceCodepointAtIndex(offset, kReplacementCodepoint, text); return kReplacementCodepoint;
} }
} }
// Move offset to the index of the next codepoint. This must be computed return codepoint;
// after any rewriting steps above since codepoint size may differ. }
U16_NEXT(text->c_str(), offset, text->length(), codepoint);
} // Replace the codepoints not handled by RenderText by an other compatible
// codepoint. Replace the unicode control characters ISO 6429 (block C0) by
// their corresponding visual symbols. Control characters can't be displayed but
// their visual symbols can. Replace PUA (Private Use Areas) codepoints with the
// 'replacement character'.
void ReplaceControlCharactersWithSymbols(bool multiline, base::string16* text) {
RewriteCodepointsInPlace(
base::BindRepeating(ReplaceControlCharacter, multiline), text);
} }
} // namespace } // namespace
...@@ -1785,12 +1802,15 @@ void RenderText::OnTextAttributeChanged() { ...@@ -1785,12 +1802,15 @@ void RenderText::OnTextAttributeChanged() {
text_elided_ = false; text_elided_ = false;
line_breaks_.SetMax(0); line_breaks_.SetMax(0);
if (obscured_) {
layout_text_ =
CreateObscuredText(text_, multiline_, obscured_reveal_index_);
} else {
layout_text_ = text_; layout_text_ = text_;
}
// Obscure the layout text by replacing hidden characters by bullets.
if (obscured_)
ObscuredText(multiline_, obscured_reveal_index_, &layout_text_);
// Handle unicode control characters ISO 6429 (block C0). Range from 0 to 0x1F
// and 0x7F.
ReplaceControlCharactersWithSymbols(multiline_, &layout_text_);
const base::string16& text = layout_text_; const base::string16& text = layout_text_;
if (truncate_length_ > 0 && truncate_length_ < text.length()) { if (truncate_length_ > 0 && truncate_length_ < text.length()) {
...@@ -1816,10 +1836,6 @@ void RenderText::OnTextAttributeChanged() { ...@@ -1816,10 +1836,6 @@ void RenderText::OnTextAttributeChanged() {
} }
} }
// Handle unicode control characters ISO 6429 (block C0). Range from 0 to 0x1F
// and 0x7F.
ReplaceControlCharactersWithSymbols(multiline_, &layout_text_);
OnLayoutTextAttributeChanged(true); OnLayoutTextAttributeChanged(true);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment