Introduce Hinted APIs to AttributeCollection.

In preparation for changing the DOM attribute APIs on Element to avoid unnecessary creation of AtomicStrings for the purpose of doing an existience check, this adds a set of "Hinted" APIs AttributeCollection that accept a hint parameter representing a weak lookup from the AtomicStringTable. WeakFind() APIs are added to AtomicStringTable to facilitate this. Knowing that no AtomicString exists for a given string contents can be used to skip a data structure search thereby avoiding all of the AtomicStringTable traversal, a potential malloc/free for a new AtomicString, and a AttributeCollection traversal. This is broken out of https://chromium-review.googlesource.com/c/chromium/src/+/2204539 which in turn is broken out of https://chromium-review.googlesource.com/c/chromium/src/+/1557854 Bug: 1083392 Change-Id: Ib996e9075704c8b68ff3cb55369adaf3bd129059 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2263472 Commit-Queue: Albert J. Wong <ajwong@chromium.org> Auto-Submit: Albert J. Wong <ajwong@chromium.org> Reviewed-by: Jeremy Roman <jbroman@chromium.org> Cr-Commit-Position: refs/heads/master@{#782776}

Introduce Hinted APIs to AttributeCollection.
In preparation for changing the DOM attribute APIs on Element to avoid unnecessary creation of AtomicStrings for the purpose of doing an existience check, this adds a set of "Hinted" APIs AttributeCollection that accept a hint parameter representing a weak lookup from the AtomicStringTable. WeakFind() APIs are added to AtomicStringTable to facilitate this. Knowing that no AtomicString exists for a given string contents can be used to skip a data structure search thereby avoiding all of the AtomicStringTable traversal, a potential malloc/free for a new AtomicString, and a AttributeCollection traversal. This is broken out of https://chromium-review.googlesource.com/c/chromium/src/+/2204539 which in turn is broken out of https://chromium-review.googlesource.com/c/chromium/src/+/1557854 Bug: 1083392 Change-Id: Ib996e9075704c8b68ff3cb55369adaf3bd129059 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2263472 Commit-Queue: Albert J. Wong <ajwong@chromium.org> Auto-Submit: Albert J. Wong <ajwong@chromium.org> Reviewed-by: Jeremy Roman <jbroman@chromium.org> Cr-Commit-Position: refs/heads/master@{#782776}
08a0dbac · Albert J. Wong · Commit Bot · ce6e5f49 · 08a0dbac · 08a0dbac
Commit 08a0dbac authored Jun 26, 2020 by Albert J. Wong Committed by Commit Bot Jun 26, 2020
4 changed files
--- a/third_party/blink/renderer/core/dom/attribute_collection.h
+++ b/third_party/blink/renderer/core/dom/attribute_collection.h
@@ -35,6 +35,7 @@
 #include "third_party/blink/renderer/core/dom/attribute.h"
 #include "third_party/blink/renderer/platform/wtf/allocator/allocator.h"
+#include "third_party/blink/renderer/platform/wtf/text/atomic_string_table.h"
 #include "third_party/blink/renderer/platform/wtf/vector.h"
 namespace blink {
@@ -67,8 +68,56 @@ class AttributeCollectionGeneric {
  wtf_size_t FindIndex(const QualifiedName&) const;
  wtf_size_t FindIndex(const AtomicString& name) const;
+  // FindHinted() and FindIndexHinted() have subtle semantics.
+  //
+  // The |hint| is a WeakResult that represents whether or not an AtomicString
+  // exists for the AttributeCollectionGeneric version of |name| which has two
+  // odd quirks:
+  //
+  //  1) In an HTML context, the hint will be from a lookup of the ASCII
+  //     lowercased version of the attribute |name| as is required by spec.
+  //  2) The |hint| is a snapshot of a membership query of the
+  //     AtomicStringTable from a specific point in time.
+  //
+  // For (1), the HTML spec says that attribute names without prefixes should
+  // be lowercased before comparison. However, if an attribute is added with
+  // a namespace using the *NS() attribute APIs then lookup becomes case
+  // sensitive. Therefore the API require both non-lowercased |name| and a
+  // lowercased |hint|.
+  //
+  // For (2), the caller must ensure that its logic is robust to changes in
+  // the AtomicStringTable between the creation of the |hint| and its use with
+  // this API. In particular, one should not modify |collection| between
+  // creation of |hint| and execution of any hinted function.
+  //
+  // A concrete example of a valid usage pattern is:
+  //
+  // WTF::AtomicStringTable::WeakResult hint =
+  //     WTF::AtomicStringTable::WeakFindLowercased(name);
+  //   .... Mutate |WTF::AtomicStringTable| but not |collection| ....
+  // collection.FindHinted(name, hint);
+  //
+  // Because FindHinted() is an existence check, as long as collection is not
+  // mutated between the hint creation and the lookup, we know that
+  //
+  //  (a) If hint.IsNull(), it cannot ever be in |collection| since
+  //      then the corresponding AtomicString would be found in
+  //      the AtomicStringTable.
+  //  (b) If !hint.IsNull() and hint is in |collection| then the table
+  //      has a reference to the corresponding AtomicString meaning
+  //      it will not be removed from the AtomicString.
+  //  (c) If the !hint.IsNull() and it is not in |collection|, then it is
+  //      possible that the underlying memory buffer for the AtomicString
+  //      corresponding to the him can  be reallocated to a different string
+  //      making the |hint| semantically invalid. However, because the
+  //      |collection| is not mutated, |hint| will not match anything.
+  iterator FindHinted(const StringView& name,
+                      WTF::AtomicStringTable::WeakResult hint) const;
+  wtf_size_t FindIndexHinted(const StringView& name,
+                             WTF::AtomicStringTable::WeakResult hint) const;
 protected:
-  wtf_size_t FindSlowCase(const AtomicString& name) const;
+  wtf_size_t FindWithPrefix(const StringView& name) const;
  ContainerMemberType attributes_;
 };
@@ -133,6 +182,16 @@ AttributeCollectionGeneric<Container, ContainerMemberType>::Find(
  return index != kNotFound ? &at(index) : nullptr;
 }
+template <typename Container, typename ContainerMemberType>
+inline typename AttributeCollectionGeneric<Container,
+                                           ContainerMemberType>::iterator
+AttributeCollectionGeneric<Container, ContainerMemberType>::FindHinted(
+    const StringView& name,
+    WTF::AtomicStringTable::WeakResult hint) const {
+  wtf_size_t index = FindIndexHinted(name, hint);
+  return index != kNotFound ? &at(index) : nullptr;
+}
 template <typename Container, typename ContainerMemberType>
 inline wtf_size_t
 AttributeCollectionGeneric<Container, ContainerMemberType>::FindIndex(
@@ -150,7 +209,17 @@ template <typename Container, typename ContainerMemberType>
 inline wtf_size_t
 AttributeCollectionGeneric<Container, ContainerMemberType>::FindIndex(
    const AtomicString& name) const {
-  bool do_slow_check = false;
+  return FindIndexHinted(name, WTF::AtomicStringTable::WeakResult(name.Impl()));
+}
+template <typename Container, typename ContainerMemberType>
+inline wtf_size_t
+AttributeCollectionGeneric<Container, ContainerMemberType>::FindIndexHinted(
+    const StringView& name,
+    WTF::AtomicStringTable::WeakResult hint) const {
+  // A slow check is required if there are any attributes with prefixes
+  // and no unprefixed name matches.
+  bool has_attributes_with_prefixes = false;
  // Optimize for the case where the attribute exists and its name exactly
  // matches.
@@ -160,15 +229,17 @@ AttributeCollectionGeneric<Container, ContainerMemberType>::FindIndex(
    // FIXME: Why check the prefix? Namespaces should be all that matter.
    // Most attributes (all of HTML and CSS) have no namespace.
    if (!it->GetName().HasPrefix()) {
-      if (name == it->LocalName())
+      if (hint == it->LocalName())
        return index;
    } else {
-      do_slow_check = true;
+      has_attributes_with_prefixes = true;
    }
  }
-  if (do_slow_check)
+  // Note that if the attribute has a prefix, the match has to be case
-    return FindSlowCase(name);
+  // sensitive therefore |name| must be used.
+  if (has_attributes_with_prefixes)
+    return FindWithPrefix(name);
  return kNotFound;
 }
@@ -187,17 +258,18 @@ AttributeCollectionGeneric<Container, ContainerMemberType>::Find(
 template <typename Container, typename ContainerMemberType>
 wtf_size_t
-AttributeCollectionGeneric<Container, ContainerMemberType>::FindSlowCase(
+AttributeCollectionGeneric<Container, ContainerMemberType>::FindWithPrefix(
-    const AtomicString& name) const {
+    const StringView& name) const {
-  // Continue to checking case-insensitively and/or full namespaced names if
+  // Check all attributes with prefixes. This is a case sensitive check.
-  // necessary:
+  // Attributes with empty prefixes are expected to be handled outside this
+  // function.
  iterator end = this->end();
  wtf_size_t index = 0;
  for (iterator it = begin(); it != end; ++it, ++index) {
    if (!it->GetName().HasPrefix()) {
      // Skip attributes with no prefixes because they must be checked in
      // FindIndex(const AtomicString&).
-      DCHECK_NE(name, it->LocalName());
+      DCHECK(!(name == it->LocalName()));
    } else {
      // FIXME: Would be faster to do this comparison without calling ToString,
      // which generates a temporary string by concatenation. But this branch is

--- a/third_party/blink/renderer/platform/wtf/text/atomic_string_table.cc
+++ b/third_party/blink/renderer/platform/wtf/text/atomic_string_table.cc
@@ -10,35 +10,9 @@
 namespace WTF {
-AtomicStringTable::AtomicStringTable() {
+namespace {
-  for (StringImpl* string : StringImpl::AllStaticStrings().Values())
-    Add(string);
-}
-AtomicStringTable::~AtomicStringTable() {
-  for (StringImpl* string : table_) {
-    if (!string->IsStatic()) {
-      DCHECK(string->IsAtomic());
-      string->SetIsAtomic(false);
-    }
-  }
-}
-void AtomicStringTable::ReserveCapacity(unsigned size) {
-  table_.ReserveCapacityForSize(size);
-}
-template <typename T, typename HashTranslator>
-scoped_refptr<StringImpl> AtomicStringTable::AddToStringTable(const T& value) {
-  HashSet<StringImpl*>::AddResult add_result =
-      table_.AddWithTranslator<HashTranslator>(value);
-  // If the string is newly-translated, then we need to adopt it.
-  // The boolean in the pair tells us if that is so.
-  return add_result.is_new_entry ? base::AdoptRef(*add_result.stored_value)
-                                 : *add_result.stored_value;
-}
+// TODO(ajwong): consider replacing with a span in the future.
 template <typename CharacterType>
 struct HashTranslatorCharBuffer {
  const CharacterType* s;
@@ -148,6 +122,85 @@ struct HashAndUTF8CharactersTranslator {
  }
 };
+struct StringViewLookupTranslator {
+  static unsigned GetHash(const StringView& buf) {
+    StringImpl* shared_impl = buf.SharedImpl();
+    if (LIKELY(shared_impl))
+      return shared_impl->GetHash();
+    if (buf.Is8Bit()) {
+      return StringHasher::ComputeHashAndMaskTop8Bits(buf.Characters8(),
+                                                      buf.length());
+    } else {
+      return StringHasher::ComputeHashAndMaskTop8Bits(buf.Characters16(),
+                                                      buf.length());
+    }
+  }
+  static bool Equal(StringImpl* const& str, const StringView& buf) {
+    return *str == buf;
+  }
+};
+struct LowercaseStringViewLookupTranslator {
+  template <typename CharType>
+  static UChar ToASCIILowerUChar(CharType ch) {
+    return ToASCIILower(ch);
+  }
+  static unsigned GetHash(const StringView& buf) {
+    // If possible, use cached hash if the string is lowercased.
+    StringImpl* shared_impl = buf.SharedImpl();
+    if (LIKELY(shared_impl && buf.IsLowerASCII()))
+      return shared_impl->GetHash();
+    if (buf.Is8Bit()) {
+      return StringHasher::ComputeHashAndMaskTop8Bits<LChar,
+                                                      ToASCIILowerUChar<LChar>>(
+          buf.Characters8(), buf.length());
+    } else {
+      return StringHasher::ComputeHashAndMaskTop8Bits<UChar,
+                                                      ToASCIILowerUChar<UChar>>(
+          buf.Characters16(), buf.length());
+    }
+  }
+  static bool Equal(StringImpl* const& str, const StringView& buf) {
+    return EqualIgnoringASCIICase(StringView(str), buf);
+  }
+};
+}  // namespace
+AtomicStringTable::AtomicStringTable() {
+  for (StringImpl* string : StringImpl::AllStaticStrings().Values())
+    Add(string);
+}
+AtomicStringTable::~AtomicStringTable() {
+  for (StringImpl* string : table_) {
+    if (!string->IsStatic()) {
+      DCHECK(string->IsAtomic());
+      string->SetIsAtomic(false);
+    }
+  }
+}
+void AtomicStringTable::ReserveCapacity(unsigned size) {
+  table_.ReserveCapacityForSize(size);
+}
+template <typename T, typename HashTranslator>
+scoped_refptr<StringImpl> AtomicStringTable::AddToStringTable(const T& value) {
+  HashSet<StringImpl*>::AddResult add_result =
+      table_.AddWithTranslator<HashTranslator>(value);
+  // If the string is newly-translated, then we need to adopt it.
+  // The boolean in the pair tells us if that is so.
+  return add_result.is_new_entry ? base::AdoptRef(*add_result.stored_value)
+                                 : *add_result.stored_value;
+}
 scoped_refptr<StringImpl> AtomicStringTable::Add(const UChar* s,
                                                 unsigned length) {
  if (!s)
@@ -221,6 +274,67 @@ scoped_refptr<StringImpl> AtomicStringTable::AddUTF8(
                          HashAndUTF8CharactersTranslator>(buffer);
 }
+AtomicStringTable::WeakResult AtomicStringTable::WeakFindSlow(
+    StringImpl* string) {
+  DCHECK(string->length());
+  const auto& it = table_.find(string);
+  if (it == table_.end())
+    return WeakResult();
+  return WeakResult(*it);
+}
+AtomicStringTable::WeakResult AtomicStringTable::WeakFindSlow(
+    const StringView& string) {
+  DCHECK(string.length());
+  const auto& it = table_.Find<StringViewLookupTranslator>(string);
+  if (it == table_.end())
+    return WeakResult();
+  return WeakResult(*it);
+}
+AtomicStringTable::WeakResult AtomicStringTable::WeakFindLowercasedSlow(
+    const StringView& string) {
+  DCHECK(string.length());
+  const auto& it = table_.Find<LowercaseStringViewLookupTranslator>(string);
+  if (it == table_.end())
+    return WeakResult();
+  return WeakResult(*it);
+}
+AtomicStringTable::WeakResult AtomicStringTable::WeakFind(const LChar* chars,
+                                                          unsigned length) {
+  if (!chars)
+    return WeakResult();
+  // Mirror the empty logic in Add().
+  if (!length)
+    return WeakResult(StringImpl::empty_);
+  LCharBuffer buffer = {chars, length};
+  const auto& it = table_.Find<LCharBufferTranslator>(buffer);
+  if (it == table_.end())
+    return WeakResult();
+  return WeakResult(*it);
+}
+AtomicStringTable::WeakResult AtomicStringTable::WeakFind(const UChar* chars,
+                                                          unsigned length) {
+  if (!chars)
+    return WeakResult();
+  // Mirror the empty logic in Add().
+  if (!length)
+    return WeakResult(StringImpl::empty_);
+  UCharBuffer buffer = {chars, length};
+  const auto& it = table_.Find<UCharBufferTranslator>(buffer);
+  if (it == table_.end())
+    return WeakResult();
+  return WeakResult(*it);
+}
 void AtomicStringTable::Remove(StringImpl* string) {
  DCHECK(string->IsAtomic());
  auto iterator = table_.find(string);

--- a/third_party/blink/renderer/platform/wtf/text/atomic_string_table.h
+++ b/third_party/blink/renderer/platform/wtf/text/atomic_string_table.h
@@ -46,6 +46,78 @@ class WTF_EXPORT AtomicStringTable final {
  scoped_refptr<StringImpl> AddUTF8(const char* characters_start,
                                    const char* characters_end);
+  // Returned as part of the WeakFind() APIs below. Represents the result of
+  // the non-creating lookup within the AtomicStringTable. See the WeakFind()
+  // documentation for a description of how it can be used.
+  class WeakResult {
+   public:
+    WeakResult() = default;
+    explicit WeakResult(StringImpl* str)
+        : ptr_value_(reinterpret_cast<uintptr_t>(str)) {
+      CHECK(!str || str->IsAtomic() || str == StringImpl::empty_);
+    }
+    bool operator==(const AtomicString& s) const { return *this == s.Impl(); }
+    bool operator==(const String& s) const { return *this == s.Impl(); }
+    bool operator==(const StringImpl* str) const {
+      return reinterpret_cast<uintptr_t>(str) == ptr_value_;
+    }
+    bool IsNull() const { return ptr_value_ != 0; }
+   private:
+    // Contains the pointer a string in a non-deferenceable form. Do NOT cast
+    // back to a StringImpl and dereference. The object may no longer be alive.
+    uintptr_t ptr_value_ = 0;
+  };
+  // Checks for existence of a string in the AtomicStringTable without
+  // unnecessarily creating an AtomicString. Useful to optimize fast-path
+  // non-existence checks inside collections of AtomicStrings.
+  //
+  // Specifically, if WeakFind() returns an IsNull() WeakResult, then a
+  // collection search can be skipped because the AtomicString cannot exist
+  // in the collection. If WeakFind() returns a non-null WeakResult, then
+  // assuming the target collection has no concurrent access, this lookup
+  // can be reused to check for existence in the collection without
+  // requiring either an AtomicString collection or another lookup within
+  // the AtomicStringTable.
+  WeakResult WeakFind(StringImpl* string) {
+    // Mirror the empty logic in Add().
+    if (UNLIKELY(!string->length()))
+      return WeakResult(StringImpl::empty_);
+    if (LIKELY(string->IsAtomic()))
+      return WeakResult(string);
+    return WeakFindSlow(string);
+  }
+  WeakResult WeakFind(const StringView& string) {
+    // Mirror the empty logic in Add().
+    if (UNLIKELY(!string.length()))
+      return WeakResult(StringImpl::empty_);
+    if (LIKELY(string.IsAtomic()))
+      return WeakResult(string.SharedImpl());
+    return WeakFindSlow(string);
+  }
+  WeakResult WeakFind(const LChar* chars, unsigned length);
+  WeakResult WeakFind(const UChar* chars, unsigned length);
+  WeakResult WeakFindLowercased(const StringView& string) {
+    // Mirror the empty logic in Add().
+    if (UNLIKELY(!string.length()))
+      return WeakResult(StringImpl::empty_);
+    if (LIKELY(string.IsAtomic() && string.IsLowerASCII()))
+      return WeakResult(string.SharedImpl());
+    return WeakFindLowercasedSlow(string);
+  }
  // This is for ~StringImpl to unregister a string before destruction since
  // the table is holding weak pointers. It should not be used directly.
  void Remove(StringImpl*);
@@ -54,6 +126,10 @@ class WTF_EXPORT AtomicStringTable final {
  template <typename T, typename HashTranslator>
  inline scoped_refptr<StringImpl> AddToStringTable(const T& value);
+  WeakResult WeakFindSlow(StringImpl*);
+  WeakResult WeakFindSlow(const StringView&);
+  WeakResult WeakFindLowercasedSlow(const StringView& string);
  HashSet<StringImpl*> table_;
  DISALLOW_COPY_AND_ASSIGN(AtomicStringTable);

--- a/third_party/blink/renderer/platform/wtf/text/string_hasher.h
+++ b/third_party/blink/renderer/platform/wtf/text/string_hasher.h
@@ -196,8 +196,9 @@ class StringHasher {
  }
 private:
+  // The StringHasher works on UChar so all converters should normalize input
+  // data into being a UChar.
  static UChar DefaultConverter(UChar character) { return character; }
  static UChar DefaultConverter(LChar character) { return character; }
  unsigned AvalancheBits() const {