Optimize reading innerHTML for no-replacement cases.

In the case that EntityMask is 0 we don't even need to iterate the characters and can just call Append(const StringView&) which will share the underlying StringImpl if the builder is empty. This optimizes the case where someone does something like: <script id=json type=json>{huge blob of text}</script> and then JSON.parse(document.getElementById('json').innerHTML) which would previously iterate the string doing nothing and then make a copy. While technically using .textContent would be the fast path devs don't seem to often do that. For example see this twitter thread: https://twitter.com/ElliottZ/status/1171817105336832000?s=20 and also many other places like SO posts: https://stackoverflow.com/questions/7581133/how-can-i-read-a-json-in-the-script-tag-from-javascript/7956249#7956249 as well as inside Airbnb's codebase (which I fixed to .textContent). I went further and optimized the case where we have a non-empty EntityMask, but don't replace anything. For example if you have a large paragraph of text and do getElementById('p').innerHTML in many cases there's nothing to replace. We can't avoid the O(n) scan of the string in that case, but we can at least avoid the copy. Change-Id: I4270b7d8f693fe8dcf8f95271f33202a209ea298 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2366612Reviewed-by: Ian Kilpatrick <ikilpatrick@chromium.org> Reviewed-by: Yoshifumi Inoue <yosin@chromium.org> Commit-Queue: Elliott Sprehn <esprehn@chromium.org> Cr-Commit-Position: refs/heads/master@{#800450}

Optimize reading innerHTML for no-replacement cases.
In the case that EntityMask is 0 we don't even need to iterate the characters and can just call Append(const StringView&) which will share the underlying StringImpl if the builder is empty. This optimizes the case where someone does something like: <script id=json type=json>{huge blob of text}</script> and then JSON.parse(document.getElementById('json').innerHTML) which would previously iterate the string doing nothing and then make a copy. While technically using .textContent would be the fast path devs don't seem to often do that. For example see this twitter thread: https://twitter.com/ElliottZ/status/1171817105336832000?s=20 and also many other places like SO posts: https://stackoverflow.com/questions/7581133/how-can-i-read-a-json-in-the-script-tag-from-javascript/7956249#7956249 as well as inside Airbnb's codebase (which I fixed to .textContent). I went further and optimized the case where we have a non-empty EntityMask, but don't replace anything. For example if you have a large paragraph of text and do getElementById('p').innerHTML in many cases there's nothing to replace. We can't avoid the O(n) scan of the string in that case, but we can at least avoid the copy. Change-Id: I4270b7d8f693fe8dcf8f95271f33202a209ea298 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2366612Reviewed-by: Ian Kilpatrick <ikilpatrick@chromium.org> Reviewed-by: Yoshifumi Inoue <yosin@chromium.org> Commit-Queue: Elliott Sprehn <esprehn@chromium.org> Cr-Commit-Position: refs/heads/master@{#800450}
41f682d5 · Elliott Sprehn · Commit Bot · 2a4fa153 · 41f682d5 · 41f682d5
Commit 41f682d5 authored Aug 21, 2020 by Elliott Sprehn Committed by Commit Bot Aug 21, 2020
3 changed files
--- a/third_party/blink/renderer/core/editing/serializers/markup_formatter.cc
+++ b/third_party/blink/renderer/core/editing/serializers/markup_formatter.cc
@@ -57,35 +57,45 @@ struct EntityDescription {
 template <typename CharType>
 static inline void AppendCharactersReplacingEntitiesInternal(
    StringBuilder& result,
+    const StringView& source,
    CharType* text,
    unsigned length,
    const EntityDescription entity_maps[],
    unsigned entity_maps_count,
    EntityMask entity_mask) {
  unsigned position_after_last_entity = 0;
-  for (unsigned i = 0; i < length; ++i) {
-    for (unsigned entity_index = 0; entity_index < entity_maps_count;
-         ++entity_index) {
-      if (text[i] == entity_maps[entity_index].entity &&
-          entity_maps[entity_index].mask & entity_mask) {
-        result.Append(text + position_after_last_entity,
-                      i - position_after_last_entity);
-        const std::string& replacement = entity_maps[entity_index].reference;
-        result.Append(replacement.c_str(), replacement.length());
-        position_after_last_entity = i + 1;
-        break;
+  // Avoid scanning the string in cases where the mask is empty, for example
+  // scripTag.innerHTML that use the kEntityMaskInCDATA mask.
+  if (entity_mask) {
+    for (unsigned i = 0; i < length; ++i) {
+      for (unsigned entity_index = 0; entity_index < entity_maps_count;
+           ++entity_index) {
+        if (text[i] == entity_maps[entity_index].entity &&
+            entity_maps[entity_index].mask & entity_mask) {
+          result.Append(text + position_after_last_entity,
+                        i - position_after_last_entity);
+          const std::string& replacement = entity_maps[entity_index].reference;
+          result.Append(replacement.c_str(), replacement.length());
+          position_after_last_entity = i + 1;
+          break;
+        }
      }
    }
  }
+  // If we didn't find anything to replace use the fast path on StringBuilder
+  // to avoid a copy. This optimizes cases like scriptTag.innerHTML or
+  // p.innerHTML when the <p> contains a single Text.
+  if (!position_after_last_entity) {
+    result.Append(source);
+    return;
+  }
  result.Append(text + position_after_last_entity,
                length - position_after_last_entity);
 }

 void MarkupFormatter::AppendCharactersReplacingEntities(
    StringBuilder& result,
-    const String& source,
-    unsigned offset,
-    unsigned length,
+    const StringView& source,
    EntityMask entity_mask) {
  DEFINE_STATIC_LOCAL(const std::string, amp_reference, ("&amp;"));
  DEFINE_STATIC_LOCAL(const std::string, lt_reference, ("&lt;"));
@@ -107,14 +117,10 @@ void MarkupFormatter::AppendCharactersReplacingEntities(
      {'\r', carriage_return_reference, kEntityCarriageReturn},
  };

-  if (!(offset + length))
-    return;
-
-  DCHECK_LE(offset + length, source.length());
  WTF::VisitCharacters(source, [&](const auto* chars, unsigned) {
    AppendCharactersReplacingEntitiesInternal(
-        result, chars + offset, length, kEntityMaps, base::size(kEntityMaps),
-        entity_mask);
+        result, source, chars, source.length(), kEntityMaps,
+        base::size(kEntityMaps), entity_mask);
  });
 }

@@ -203,7 +209,7 @@ void MarkupFormatter::AppendEndMarkup(StringBuilder& result,
 void MarkupFormatter::AppendAttributeValue(StringBuilder& result,
                                           const String& attribute,
                                           bool document_is_html) {
-  AppendCharactersReplacingEntities(result, attribute, 0, attribute.length(),
+  AppendCharactersReplacingEntities(result, attribute,
                                    document_is_html
                                        ? kEntityMaskInHTMLAttributeValue
                                        : kEntityMaskInAttributeValue);
@@ -226,8 +232,7 @@ void MarkupFormatter::AppendAttribute(StringBuilder& result,
 }

 void MarkupFormatter::AppendText(StringBuilder& result, const Text& text) {
-  const String& str = text.data();
-  AppendCharactersReplacingEntities(result, str, 0, str.length(),
+  AppendCharactersReplacingEntities(result, text.data(),
                                    EntityMaskForText(text));
 }


--- a/third_party/blink/renderer/core/editing/serializers/markup_formatter.h
+++ b/third_party/blink/renderer/core/editing/serializers/markup_formatter.h
@@ -82,11 +82,9 @@ class MarkupFormatter final {
                              const String& value,
                              bool document_is_html);
  static void AppendCDATASection(StringBuilder&, const String&);
-  static void AppendCharactersReplacingEntities(StringBuilder&,
-                                                const String&,
-                                                unsigned,
-                                                unsigned,
-                                                EntityMask);
+  static void AppendCharactersReplacingEntities(StringBuilder& result,
+                                                const StringView& source,
+                                                EntityMask entity_mask);
  static void AppendComment(StringBuilder&, const String&);
  static void AppendDocumentType(StringBuilder&, const DocumentType&);
  static void AppendProcessingInstruction(StringBuilder&,

--- a/third_party/blink/renderer/core/editing/serializers/styled_markup_accumulator.cc
+++ b/third_party/blink/renderer/core/editing/serializers/styled_markup_accumulator.cc
@@ -91,7 +91,8 @@ void StyledMarkupAccumulator::AppendText(Text& text) {
    }
  }
  MarkupFormatter::AppendCharactersReplacingEntities(
-      result_, str, start, length, formatter_.EntityMaskForText(text));
+      result_, StringView(str, start, length),
+      formatter_.EntityMaskForText(text));
 }

 void StyledMarkupAccumulator::AppendTextWithInlineStyle(
@@ -119,8 +120,8 @@ void StyledMarkupAccumulator::AppendTextWithInlineStyle(
    String content =
        use_rendered_text ? RenderedText(text) : StringValueForRange(text);
    StringBuilder buffer;
-    MarkupFormatter::AppendCharactersReplacingEntities(
-        buffer, content, 0, content.length(), kEntityMaskInPCDATA);
+    MarkupFormatter::AppendCharactersReplacingEntities(buffer, content,
+                                                       kEntityMaskInPCDATA);
    // Keep collapsible white spaces as is during markup sanitization.
    const String text_to_append =
        IsForMarkupSanitization()