Commit 08a0dbac authored by Albert J. Wong's avatar Albert J. Wong Committed by Commit Bot

Introduce Hinted APIs to AttributeCollection.

In preparation for changing the DOM attribute APIs on Element to avoid
unnecessary creation of AtomicStrings for the purpose of doing an
existience check, this adds a set of "Hinted" APIs AttributeCollection
that accept a hint parameter representing a weak lookup from the
AtomicStringTable.  WeakFind() APIs are added to AtomicStringTable to
facilitate this.

Knowing that no AtomicString exists for a given string contents can be
used to skip a data structure search thereby avoiding all of the
AtomicStringTable traversal, a potential malloc/free for a new
AtomicString, and a AttributeCollection traversal.

This is broken out of
  https://chromium-review.googlesource.com/c/chromium/src/+/2204539

which in turn is broken out of
  https://chromium-review.googlesource.com/c/chromium/src/+/1557854

Bug: 1083392
Change-Id: Ib996e9075704c8b68ff3cb55369adaf3bd129059
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2263472
Commit-Queue: Albert J. Wong <ajwong@chromium.org>
Auto-Submit: Albert J. Wong <ajwong@chromium.org>
Reviewed-by: default avatarJeremy Roman <jbroman@chromium.org>
Cr-Commit-Position: refs/heads/master@{#782776}
parent ce6e5f49
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include "third_party/blink/renderer/core/dom/attribute.h" #include "third_party/blink/renderer/core/dom/attribute.h"
#include "third_party/blink/renderer/platform/wtf/allocator/allocator.h" #include "third_party/blink/renderer/platform/wtf/allocator/allocator.h"
#include "third_party/blink/renderer/platform/wtf/text/atomic_string_table.h"
#include "third_party/blink/renderer/platform/wtf/vector.h" #include "third_party/blink/renderer/platform/wtf/vector.h"
namespace blink { namespace blink {
...@@ -67,8 +68,56 @@ class AttributeCollectionGeneric { ...@@ -67,8 +68,56 @@ class AttributeCollectionGeneric {
wtf_size_t FindIndex(const QualifiedName&) const; wtf_size_t FindIndex(const QualifiedName&) const;
wtf_size_t FindIndex(const AtomicString& name) const; wtf_size_t FindIndex(const AtomicString& name) const;
// FindHinted() and FindIndexHinted() have subtle semantics.
//
// The |hint| is a WeakResult that represents whether or not an AtomicString
// exists for the AttributeCollectionGeneric version of |name| which has two
// odd quirks:
//
// 1) In an HTML context, the hint will be from a lookup of the ASCII
// lowercased version of the attribute |name| as is required by spec.
// 2) The |hint| is a snapshot of a membership query of the
// AtomicStringTable from a specific point in time.
//
// For (1), the HTML spec says that attribute names without prefixes should
// be lowercased before comparison. However, if an attribute is added with
// a namespace using the *NS() attribute APIs then lookup becomes case
// sensitive. Therefore the API require both non-lowercased |name| and a
// lowercased |hint|.
//
// For (2), the caller must ensure that its logic is robust to changes in
// the AtomicStringTable between the creation of the |hint| and its use with
// this API. In particular, one should not modify |collection| between
// creation of |hint| and execution of any hinted function.
//
// A concrete example of a valid usage pattern is:
//
// WTF::AtomicStringTable::WeakResult hint =
// WTF::AtomicStringTable::WeakFindLowercased(name);
// .... Mutate |WTF::AtomicStringTable| but not |collection| ....
// collection.FindHinted(name, hint);
//
// Because FindHinted() is an existence check, as long as collection is not
// mutated between the hint creation and the lookup, we know that
//
// (a) If hint.IsNull(), it cannot ever be in |collection| since
// then the corresponding AtomicString would be found in
// the AtomicStringTable.
// (b) If !hint.IsNull() and hint is in |collection| then the table
// has a reference to the corresponding AtomicString meaning
// it will not be removed from the AtomicString.
// (c) If the !hint.IsNull() and it is not in |collection|, then it is
// possible that the underlying memory buffer for the AtomicString
// corresponding to the him can be reallocated to a different string
// making the |hint| semantically invalid. However, because the
// |collection| is not mutated, |hint| will not match anything.
iterator FindHinted(const StringView& name,
WTF::AtomicStringTable::WeakResult hint) const;
wtf_size_t FindIndexHinted(const StringView& name,
WTF::AtomicStringTable::WeakResult hint) const;
protected: protected:
wtf_size_t FindSlowCase(const AtomicString& name) const; wtf_size_t FindWithPrefix(const StringView& name) const;
ContainerMemberType attributes_; ContainerMemberType attributes_;
}; };
...@@ -133,6 +182,16 @@ AttributeCollectionGeneric<Container, ContainerMemberType>::Find( ...@@ -133,6 +182,16 @@ AttributeCollectionGeneric<Container, ContainerMemberType>::Find(
return index != kNotFound ? &at(index) : nullptr; return index != kNotFound ? &at(index) : nullptr;
} }
template <typename Container, typename ContainerMemberType>
inline typename AttributeCollectionGeneric<Container,
ContainerMemberType>::iterator
AttributeCollectionGeneric<Container, ContainerMemberType>::FindHinted(
const StringView& name,
WTF::AtomicStringTable::WeakResult hint) const {
wtf_size_t index = FindIndexHinted(name, hint);
return index != kNotFound ? &at(index) : nullptr;
}
template <typename Container, typename ContainerMemberType> template <typename Container, typename ContainerMemberType>
inline wtf_size_t inline wtf_size_t
AttributeCollectionGeneric<Container, ContainerMemberType>::FindIndex( AttributeCollectionGeneric<Container, ContainerMemberType>::FindIndex(
...@@ -150,7 +209,17 @@ template <typename Container, typename ContainerMemberType> ...@@ -150,7 +209,17 @@ template <typename Container, typename ContainerMemberType>
inline wtf_size_t inline wtf_size_t
AttributeCollectionGeneric<Container, ContainerMemberType>::FindIndex( AttributeCollectionGeneric<Container, ContainerMemberType>::FindIndex(
const AtomicString& name) const { const AtomicString& name) const {
bool do_slow_check = false; return FindIndexHinted(name, WTF::AtomicStringTable::WeakResult(name.Impl()));
}
template <typename Container, typename ContainerMemberType>
inline wtf_size_t
AttributeCollectionGeneric<Container, ContainerMemberType>::FindIndexHinted(
const StringView& name,
WTF::AtomicStringTable::WeakResult hint) const {
// A slow check is required if there are any attributes with prefixes
// and no unprefixed name matches.
bool has_attributes_with_prefixes = false;
// Optimize for the case where the attribute exists and its name exactly // Optimize for the case where the attribute exists and its name exactly
// matches. // matches.
...@@ -160,15 +229,17 @@ AttributeCollectionGeneric<Container, ContainerMemberType>::FindIndex( ...@@ -160,15 +229,17 @@ AttributeCollectionGeneric<Container, ContainerMemberType>::FindIndex(
// FIXME: Why check the prefix? Namespaces should be all that matter. // FIXME: Why check the prefix? Namespaces should be all that matter.
// Most attributes (all of HTML and CSS) have no namespace. // Most attributes (all of HTML and CSS) have no namespace.
if (!it->GetName().HasPrefix()) { if (!it->GetName().HasPrefix()) {
if (name == it->LocalName()) if (hint == it->LocalName())
return index; return index;
} else { } else {
do_slow_check = true; has_attributes_with_prefixes = true;
} }
} }
if (do_slow_check) // Note that if the attribute has a prefix, the match has to be case
return FindSlowCase(name); // sensitive therefore |name| must be used.
if (has_attributes_with_prefixes)
return FindWithPrefix(name);
return kNotFound; return kNotFound;
} }
...@@ -187,17 +258,18 @@ AttributeCollectionGeneric<Container, ContainerMemberType>::Find( ...@@ -187,17 +258,18 @@ AttributeCollectionGeneric<Container, ContainerMemberType>::Find(
template <typename Container, typename ContainerMemberType> template <typename Container, typename ContainerMemberType>
wtf_size_t wtf_size_t
AttributeCollectionGeneric<Container, ContainerMemberType>::FindSlowCase( AttributeCollectionGeneric<Container, ContainerMemberType>::FindWithPrefix(
const AtomicString& name) const { const StringView& name) const {
// Continue to checking case-insensitively and/or full namespaced names if // Check all attributes with prefixes. This is a case sensitive check.
// necessary: // Attributes with empty prefixes are expected to be handled outside this
// function.
iterator end = this->end(); iterator end = this->end();
wtf_size_t index = 0; wtf_size_t index = 0;
for (iterator it = begin(); it != end; ++it, ++index) { for (iterator it = begin(); it != end; ++it, ++index) {
if (!it->GetName().HasPrefix()) { if (!it->GetName().HasPrefix()) {
// Skip attributes with no prefixes because they must be checked in // Skip attributes with no prefixes because they must be checked in
// FindIndex(const AtomicString&). // FindIndex(const AtomicString&).
DCHECK_NE(name, it->LocalName()); DCHECK(!(name == it->LocalName()));
} else { } else {
// FIXME: Would be faster to do this comparison without calling ToString, // FIXME: Would be faster to do this comparison without calling ToString,
// which generates a temporary string by concatenation. But this branch is // which generates a temporary string by concatenation. But this branch is
......
...@@ -10,35 +10,9 @@ ...@@ -10,35 +10,9 @@
namespace WTF { namespace WTF {
AtomicStringTable::AtomicStringTable() { namespace {
for (StringImpl* string : StringImpl::AllStaticStrings().Values())
Add(string);
}
AtomicStringTable::~AtomicStringTable() {
for (StringImpl* string : table_) {
if (!string->IsStatic()) {
DCHECK(string->IsAtomic());
string->SetIsAtomic(false);
}
}
}
void AtomicStringTable::ReserveCapacity(unsigned size) {
table_.ReserveCapacityForSize(size);
}
template <typename T, typename HashTranslator>
scoped_refptr<StringImpl> AtomicStringTable::AddToStringTable(const T& value) {
HashSet<StringImpl*>::AddResult add_result =
table_.AddWithTranslator<HashTranslator>(value);
// If the string is newly-translated, then we need to adopt it.
// The boolean in the pair tells us if that is so.
return add_result.is_new_entry ? base::AdoptRef(*add_result.stored_value)
: *add_result.stored_value;
}
// TODO(ajwong): consider replacing with a span in the future.
template <typename CharacterType> template <typename CharacterType>
struct HashTranslatorCharBuffer { struct HashTranslatorCharBuffer {
const CharacterType* s; const CharacterType* s;
...@@ -148,6 +122,85 @@ struct HashAndUTF8CharactersTranslator { ...@@ -148,6 +122,85 @@ struct HashAndUTF8CharactersTranslator {
} }
}; };
struct StringViewLookupTranslator {
static unsigned GetHash(const StringView& buf) {
StringImpl* shared_impl = buf.SharedImpl();
if (LIKELY(shared_impl))
return shared_impl->GetHash();
if (buf.Is8Bit()) {
return StringHasher::ComputeHashAndMaskTop8Bits(buf.Characters8(),
buf.length());
} else {
return StringHasher::ComputeHashAndMaskTop8Bits(buf.Characters16(),
buf.length());
}
}
static bool Equal(StringImpl* const& str, const StringView& buf) {
return *str == buf;
}
};
struct LowercaseStringViewLookupTranslator {
template <typename CharType>
static UChar ToASCIILowerUChar(CharType ch) {
return ToASCIILower(ch);
}
static unsigned GetHash(const StringView& buf) {
// If possible, use cached hash if the string is lowercased.
StringImpl* shared_impl = buf.SharedImpl();
if (LIKELY(shared_impl && buf.IsLowerASCII()))
return shared_impl->GetHash();
if (buf.Is8Bit()) {
return StringHasher::ComputeHashAndMaskTop8Bits<LChar,
ToASCIILowerUChar<LChar>>(
buf.Characters8(), buf.length());
} else {
return StringHasher::ComputeHashAndMaskTop8Bits<UChar,
ToASCIILowerUChar<UChar>>(
buf.Characters16(), buf.length());
}
}
static bool Equal(StringImpl* const& str, const StringView& buf) {
return EqualIgnoringASCIICase(StringView(str), buf);
}
};
} // namespace
AtomicStringTable::AtomicStringTable() {
for (StringImpl* string : StringImpl::AllStaticStrings().Values())
Add(string);
}
AtomicStringTable::~AtomicStringTable() {
for (StringImpl* string : table_) {
if (!string->IsStatic()) {
DCHECK(string->IsAtomic());
string->SetIsAtomic(false);
}
}
}
void AtomicStringTable::ReserveCapacity(unsigned size) {
table_.ReserveCapacityForSize(size);
}
template <typename T, typename HashTranslator>
scoped_refptr<StringImpl> AtomicStringTable::AddToStringTable(const T& value) {
HashSet<StringImpl*>::AddResult add_result =
table_.AddWithTranslator<HashTranslator>(value);
// If the string is newly-translated, then we need to adopt it.
// The boolean in the pair tells us if that is so.
return add_result.is_new_entry ? base::AdoptRef(*add_result.stored_value)
: *add_result.stored_value;
}
scoped_refptr<StringImpl> AtomicStringTable::Add(const UChar* s, scoped_refptr<StringImpl> AtomicStringTable::Add(const UChar* s,
unsigned length) { unsigned length) {
if (!s) if (!s)
...@@ -221,6 +274,67 @@ scoped_refptr<StringImpl> AtomicStringTable::AddUTF8( ...@@ -221,6 +274,67 @@ scoped_refptr<StringImpl> AtomicStringTable::AddUTF8(
HashAndUTF8CharactersTranslator>(buffer); HashAndUTF8CharactersTranslator>(buffer);
} }
AtomicStringTable::WeakResult AtomicStringTable::WeakFindSlow(
StringImpl* string) {
DCHECK(string->length());
const auto& it = table_.find(string);
if (it == table_.end())
return WeakResult();
return WeakResult(*it);
}
AtomicStringTable::WeakResult AtomicStringTable::WeakFindSlow(
const StringView& string) {
DCHECK(string.length());
const auto& it = table_.Find<StringViewLookupTranslator>(string);
if (it == table_.end())
return WeakResult();
return WeakResult(*it);
}
AtomicStringTable::WeakResult AtomicStringTable::WeakFindLowercasedSlow(
const StringView& string) {
DCHECK(string.length());
const auto& it = table_.Find<LowercaseStringViewLookupTranslator>(string);
if (it == table_.end())
return WeakResult();
return WeakResult(*it);
}
AtomicStringTable::WeakResult AtomicStringTable::WeakFind(const LChar* chars,
unsigned length) {
if (!chars)
return WeakResult();
// Mirror the empty logic in Add().
if (!length)
return WeakResult(StringImpl::empty_);
LCharBuffer buffer = {chars, length};
const auto& it = table_.Find<LCharBufferTranslator>(buffer);
if (it == table_.end())
return WeakResult();
return WeakResult(*it);
}
AtomicStringTable::WeakResult AtomicStringTable::WeakFind(const UChar* chars,
unsigned length) {
if (!chars)
return WeakResult();
// Mirror the empty logic in Add().
if (!length)
return WeakResult(StringImpl::empty_);
UCharBuffer buffer = {chars, length};
const auto& it = table_.Find<UCharBufferTranslator>(buffer);
if (it == table_.end())
return WeakResult();
return WeakResult(*it);
}
void AtomicStringTable::Remove(StringImpl* string) { void AtomicStringTable::Remove(StringImpl* string) {
DCHECK(string->IsAtomic()); DCHECK(string->IsAtomic());
auto iterator = table_.find(string); auto iterator = table_.find(string);
......
...@@ -46,6 +46,78 @@ class WTF_EXPORT AtomicStringTable final { ...@@ -46,6 +46,78 @@ class WTF_EXPORT AtomicStringTable final {
scoped_refptr<StringImpl> AddUTF8(const char* characters_start, scoped_refptr<StringImpl> AddUTF8(const char* characters_start,
const char* characters_end); const char* characters_end);
// Returned as part of the WeakFind() APIs below. Represents the result of
// the non-creating lookup within the AtomicStringTable. See the WeakFind()
// documentation for a description of how it can be used.
class WeakResult {
public:
WeakResult() = default;
explicit WeakResult(StringImpl* str)
: ptr_value_(reinterpret_cast<uintptr_t>(str)) {
CHECK(!str || str->IsAtomic() || str == StringImpl::empty_);
}
bool operator==(const AtomicString& s) const { return *this == s.Impl(); }
bool operator==(const String& s) const { return *this == s.Impl(); }
bool operator==(const StringImpl* str) const {
return reinterpret_cast<uintptr_t>(str) == ptr_value_;
}
bool IsNull() const { return ptr_value_ != 0; }
private:
// Contains the pointer a string in a non-deferenceable form. Do NOT cast
// back to a StringImpl and dereference. The object may no longer be alive.
uintptr_t ptr_value_ = 0;
};
// Checks for existence of a string in the AtomicStringTable without
// unnecessarily creating an AtomicString. Useful to optimize fast-path
// non-existence checks inside collections of AtomicStrings.
//
// Specifically, if WeakFind() returns an IsNull() WeakResult, then a
// collection search can be skipped because the AtomicString cannot exist
// in the collection. If WeakFind() returns a non-null WeakResult, then
// assuming the target collection has no concurrent access, this lookup
// can be reused to check for existence in the collection without
// requiring either an AtomicString collection or another lookup within
// the AtomicStringTable.
WeakResult WeakFind(StringImpl* string) {
// Mirror the empty logic in Add().
if (UNLIKELY(!string->length()))
return WeakResult(StringImpl::empty_);
if (LIKELY(string->IsAtomic()))
return WeakResult(string);
return WeakFindSlow(string);
}
WeakResult WeakFind(const StringView& string) {
// Mirror the empty logic in Add().
if (UNLIKELY(!string.length()))
return WeakResult(StringImpl::empty_);
if (LIKELY(string.IsAtomic()))
return WeakResult(string.SharedImpl());
return WeakFindSlow(string);
}
WeakResult WeakFind(const LChar* chars, unsigned length);
WeakResult WeakFind(const UChar* chars, unsigned length);
WeakResult WeakFindLowercased(const StringView& string) {
// Mirror the empty logic in Add().
if (UNLIKELY(!string.length()))
return WeakResult(StringImpl::empty_);
if (LIKELY(string.IsAtomic() && string.IsLowerASCII()))
return WeakResult(string.SharedImpl());
return WeakFindLowercasedSlow(string);
}
// This is for ~StringImpl to unregister a string before destruction since // This is for ~StringImpl to unregister a string before destruction since
// the table is holding weak pointers. It should not be used directly. // the table is holding weak pointers. It should not be used directly.
void Remove(StringImpl*); void Remove(StringImpl*);
...@@ -54,6 +126,10 @@ class WTF_EXPORT AtomicStringTable final { ...@@ -54,6 +126,10 @@ class WTF_EXPORT AtomicStringTable final {
template <typename T, typename HashTranslator> template <typename T, typename HashTranslator>
inline scoped_refptr<StringImpl> AddToStringTable(const T& value); inline scoped_refptr<StringImpl> AddToStringTable(const T& value);
WeakResult WeakFindSlow(StringImpl*);
WeakResult WeakFindSlow(const StringView&);
WeakResult WeakFindLowercasedSlow(const StringView& string);
HashSet<StringImpl*> table_; HashSet<StringImpl*> table_;
DISALLOW_COPY_AND_ASSIGN(AtomicStringTable); DISALLOW_COPY_AND_ASSIGN(AtomicStringTable);
......
...@@ -196,8 +196,9 @@ class StringHasher { ...@@ -196,8 +196,9 @@ class StringHasher {
} }
private: private:
// The StringHasher works on UChar so all converters should normalize input
// data into being a UChar.
static UChar DefaultConverter(UChar character) { return character; } static UChar DefaultConverter(UChar character) { return character; }
static UChar DefaultConverter(LChar character) { return character; } static UChar DefaultConverter(LChar character) { return character; }
unsigned AvalancheBits() const { unsigned AvalancheBits() const {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment