Commit 41f682d5 authored by Elliott Sprehn's avatar Elliott Sprehn Committed by Commit Bot

Optimize reading innerHTML for no-replacement cases.

In the case that EntityMask is 0 we don't even need to iterate the
characters and can just call Append(const StringView&) which will share
the underlying StringImpl if the builder is empty. This optimizes the
case where someone does something like:

<script id=json type=json>{huge blob of text}</script>

and then

JSON.parse(document.getElementById('json').innerHTML)

which would previously iterate the string doing nothing and then make a
copy.

While technically using .textContent would be the fast path devs
don't seem to often do that. For example see this twitter thread:
https://twitter.com/ElliottZ/status/1171817105336832000?s=20

and also many other places like SO posts:
https://stackoverflow.com/questions/7581133/how-can-i-read-a-json-in-the-script-tag-from-javascript/7956249#7956249

as well as inside Airbnb's codebase (which I fixed to .textContent).

I went further and optimized the case where we have a non-empty
EntityMask, but don't replace anything. For example if you have a large
paragraph of text and do getElementById('p').innerHTML in many cases
there's nothing to replace. We can't avoid the O(n) scan of the string
in that case, but we can at least avoid the copy.

Change-Id: I4270b7d8f693fe8dcf8f95271f33202a209ea298
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2366612Reviewed-by: default avatarIan Kilpatrick <ikilpatrick@chromium.org>
Reviewed-by: default avatarYoshifumi Inoue <yosin@chromium.org>
Commit-Queue: Elliott Sprehn <esprehn@chromium.org>
Cr-Commit-Position: refs/heads/master@{#800450}
parent 2a4fa153
......@@ -57,35 +57,45 @@ struct EntityDescription {
template <typename CharType>
static inline void AppendCharactersReplacingEntitiesInternal(
StringBuilder& result,
const StringView& source,
CharType* text,
unsigned length,
const EntityDescription entity_maps[],
unsigned entity_maps_count,
EntityMask entity_mask) {
unsigned position_after_last_entity = 0;
for (unsigned i = 0; i < length; ++i) {
for (unsigned entity_index = 0; entity_index < entity_maps_count;
++entity_index) {
if (text[i] == entity_maps[entity_index].entity &&
entity_maps[entity_index].mask & entity_mask) {
result.Append(text + position_after_last_entity,
i - position_after_last_entity);
const std::string& replacement = entity_maps[entity_index].reference;
result.Append(replacement.c_str(), replacement.length());
position_after_last_entity = i + 1;
break;
// Avoid scanning the string in cases where the mask is empty, for example
// scripTag.innerHTML that use the kEntityMaskInCDATA mask.
if (entity_mask) {
for (unsigned i = 0; i < length; ++i) {
for (unsigned entity_index = 0; entity_index < entity_maps_count;
++entity_index) {
if (text[i] == entity_maps[entity_index].entity &&
entity_maps[entity_index].mask & entity_mask) {
result.Append(text + position_after_last_entity,
i - position_after_last_entity);
const std::string& replacement = entity_maps[entity_index].reference;
result.Append(replacement.c_str(), replacement.length());
position_after_last_entity = i + 1;
break;
}
}
}
}
// If we didn't find anything to replace use the fast path on StringBuilder
// to avoid a copy. This optimizes cases like scriptTag.innerHTML or
// p.innerHTML when the <p> contains a single Text.
if (!position_after_last_entity) {
result.Append(source);
return;
}
result.Append(text + position_after_last_entity,
length - position_after_last_entity);
}
void MarkupFormatter::AppendCharactersReplacingEntities(
StringBuilder& result,
const String& source,
unsigned offset,
unsigned length,
const StringView& source,
EntityMask entity_mask) {
DEFINE_STATIC_LOCAL(const std::string, amp_reference, ("&amp;"));
DEFINE_STATIC_LOCAL(const std::string, lt_reference, ("&lt;"));
......@@ -107,14 +117,10 @@ void MarkupFormatter::AppendCharactersReplacingEntities(
{'\r', carriage_return_reference, kEntityCarriageReturn},
};
if (!(offset + length))
return;
DCHECK_LE(offset + length, source.length());
WTF::VisitCharacters(source, [&](const auto* chars, unsigned) {
AppendCharactersReplacingEntitiesInternal(
result, chars + offset, length, kEntityMaps, base::size(kEntityMaps),
entity_mask);
result, source, chars, source.length(), kEntityMaps,
base::size(kEntityMaps), entity_mask);
});
}
......@@ -203,7 +209,7 @@ void MarkupFormatter::AppendEndMarkup(StringBuilder& result,
void MarkupFormatter::AppendAttributeValue(StringBuilder& result,
const String& attribute,
bool document_is_html) {
AppendCharactersReplacingEntities(result, attribute, 0, attribute.length(),
AppendCharactersReplacingEntities(result, attribute,
document_is_html
? kEntityMaskInHTMLAttributeValue
: kEntityMaskInAttributeValue);
......@@ -226,8 +232,7 @@ void MarkupFormatter::AppendAttribute(StringBuilder& result,
}
void MarkupFormatter::AppendText(StringBuilder& result, const Text& text) {
const String& str = text.data();
AppendCharactersReplacingEntities(result, str, 0, str.length(),
AppendCharactersReplacingEntities(result, text.data(),
EntityMaskForText(text));
}
......
......@@ -82,11 +82,9 @@ class MarkupFormatter final {
const String& value,
bool document_is_html);
static void AppendCDATASection(StringBuilder&, const String&);
static void AppendCharactersReplacingEntities(StringBuilder&,
const String&,
unsigned,
unsigned,
EntityMask);
static void AppendCharactersReplacingEntities(StringBuilder& result,
const StringView& source,
EntityMask entity_mask);
static void AppendComment(StringBuilder&, const String&);
static void AppendDocumentType(StringBuilder&, const DocumentType&);
static void AppendProcessingInstruction(StringBuilder&,
......
......@@ -91,7 +91,8 @@ void StyledMarkupAccumulator::AppendText(Text& text) {
}
}
MarkupFormatter::AppendCharactersReplacingEntities(
result_, str, start, length, formatter_.EntityMaskForText(text));
result_, StringView(str, start, length),
formatter_.EntityMaskForText(text));
}
void StyledMarkupAccumulator::AppendTextWithInlineStyle(
......@@ -119,8 +120,8 @@ void StyledMarkupAccumulator::AppendTextWithInlineStyle(
String content =
use_rendered_text ? RenderedText(text) : StringValueForRange(text);
StringBuilder buffer;
MarkupFormatter::AppendCharactersReplacingEntities(
buffer, content, 0, content.length(), kEntityMaskInPCDATA);
MarkupFormatter::AppendCharactersReplacingEntities(buffer, content,
kEntityMaskInPCDATA);
// Keep collapsible white spaces as is during markup sanitization.
const String text_to_append =
IsForMarkupSanitization()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment